From 616346b423e739f53621216a90144e6402e16989 Mon Sep 17 00:00:00 2001 From: mdfaijul Date: Wed, 6 Sep 2023 17:56:01 -0700 Subject: [PATCH 001/478] Enable quantized matmul fusions. --- tensorflow/core/kernels/mkl/BUILD | 20 + .../core/kernels/mkl/mkl_fused_ops_test.cc | 2 +- .../core/kernels/mkl/mkl_kernel_util.cc | 4 + tensorflow/core/kernels/mkl/mkl_kernel_util.h | 32 + .../core/kernels/mkl/mkl_matmul_op_fused.cc | 752 ++++++++++++++++-- .../core/kernels/mkl/mkl_matmul_ops_common.h | 116 ++- tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc | 2 +- .../core/kernels/mkl/mkl_qmatmul_op_test.cc | 425 +++++++--- .../mkl/onednn_fused_matmul_ops_test.cc | 749 +++++++++++++++++ tensorflow/core/ops/mkl_nn_ops.cc | 40 + 10 files changed, 1915 insertions(+), 227 deletions(-) create mode 100644 tensorflow/core/kernels/mkl/onednn_fused_matmul_ops_test.cc diff --git a/tensorflow/core/kernels/mkl/BUILD b/tensorflow/core/kernels/mkl/BUILD index 329139874b101f..4dd5370d281aa2 100644 --- a/tensorflow/core/kernels/mkl/BUILD +++ b/tensorflow/core/kernels/mkl/BUILD @@ -95,6 +95,7 @@ tf_mkl_kernel_library( hdrs = [ "mkl_kernel_util.h", "mkl_matmul_ops_common.h", + "mkl_quantized_conv_ops.h", ], deps = [ "//tensorflow/core:graph", @@ -171,6 +172,7 @@ tf_cc_test_mkl( srcs = ["mkl_qmatmul_op_test.cc"], linkstatic = 1, # Fixes dyld error on MacOS. deps = [ + ":mkl_matmul_op", ":mkl_qmatmul_op", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:math_ops_op_lib", @@ -554,3 +556,21 @@ tf_cc_test_mkl( "//tensorflow/core/kernels/mkl:mkl_softmax_op", ] + MKL_TEST_DEPS, ) + +tf_cc_test_mkl( + name = "onednn_fused_matmul_ops_test", + size = "medium", + srcs = ["onednn_fused_matmul_ops_test.cc"], + linkstatic = 1, # Fixes dyld error on MacOS. + deps = [ + ":mkl_matmul_op", + ":mkl_kernel_util", + "//tensorflow/cc:cc_ops_internal", + "//tensorflow/core:direct_session", + "//tensorflow/core/kernels:matmul_op", + "//tensorflow/core/kernels:relu_op", + "//tensorflow/core/kernels:bias_op", + "//tensorflow/core/kernels:quantization_utils", + "@com_google_absl//absl/strings", + ] + MKL_TEST_DEPS, +) diff --git a/tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc index 857a25bbe55e7a..6d709794b6fb92 100644 --- a/tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc +++ b/tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc @@ -1098,7 +1098,7 @@ class MklFusedMatMulCacheTest : public OpsTestBase { // Bias vector. AddInputFromArray(TensorShape({4}), {1, 2, 3, 4}); - using KernelType = MklDnnMatMulOpBase; + using KernelType = MklDnnMatMulOpBase; // Before the first time kernel execution, weight should be empty EXPECT_TRUE(static_cast(this->kernel_.get()) ->IsWeightCacheEmpty(this->context_.get())); diff --git a/tensorflow/core/kernels/mkl/mkl_kernel_util.cc b/tensorflow/core/kernels/mkl/mkl_kernel_util.cc index a4731ea7888337..02e5d0a578df9c 100644 --- a/tensorflow/core/kernels/mkl/mkl_kernel_util.cc +++ b/tensorflow/core/kernels/mkl/mkl_kernel_util.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/const_op.h" #include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/lib/core/errors.h" namespace tensorflow { @@ -40,6 +41,9 @@ void MklTestingUtil::RunMklQuantizeOp(const Tensor& input, Node* max_node = test::graph::Constant(&*graph, Tensor(max), "max"); Node* quantize_op; + string round_mode = + (mode == "SCALE") ? "HALF_TO_EVEN" : "HALF_AWAY_FROM_ZERO"; + TF_CHECK_OK(NodeBuilder("mkl_quantizeV2", "_MklQuantizeV2") .Input(input_node) .Input(min_node) diff --git a/tensorflow/core/kernels/mkl/mkl_kernel_util.h b/tensorflow/core/kernels/mkl/mkl_kernel_util.h index fb9df4d237c74b..a3015a7b09025c 100644 --- a/tensorflow/core/kernels/mkl/mkl_kernel_util.h +++ b/tensorflow/core/kernels/mkl/mkl_kernel_util.h @@ -49,6 +49,38 @@ class MklTestingUtil { *tensor_min = min(); *tensor_max = max(); } + + // This utility function mimics Quantization of float/bfloat16 tensor with + // oneDNN backend QuantizeV2 operation. Since the op signature requires min + // and max values to be in float type, min_tensor and max_tensor should have + // their dtype set to DT_FLOAT. + template + static Status GetQuantizationTensors(const Tensor& input, Tensor* output, + DataType out_type, const string mode, + Tensor* min_tensor, Tensor* max_tensor) { + if (min_tensor->dtype() != DT_FLOAT || max_tensor->dtype() != DT_FLOAT) { + return absl::UnimplementedError("Tensor must be float32."); + } + T min; + T max; + ComputeMinMax(input, &min, &max); + + float adjusted_min = static_cast(min); + float adjusted_max = static_cast(max); + if (mode == "SCALED") { + if (output->dtype() != DT_QINT8) { + return absl::UnimplementedError("Tensor must be QInt8 in SCALED mode."); + } + float range = std::max(std::abs(adjusted_min), std::abs(adjusted_max)); + adjusted_min = -range; + adjusted_max = range; + } + RunMklQuantizeOp(input, adjusted_min, adjusted_max, out_type, mode, output); + min_tensor->flat()(0) = adjusted_min; + max_tensor->flat()(0) = adjusted_max; + + return OkStatus(); + } }; #ifdef ENABLE_ONEDNN_V3 diff --git a/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc b/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc index 2d0065a52e5b4a..7e5544f1c3e3b4 100644 --- a/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc +++ b/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,19 +19,29 @@ limitations under the License. // Multiplication (MatMul) with bias (BiasAdd) operations. #if defined(INTEL_MKL) +#include + +#include "oneapi/dnnl/dnnl.hpp" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/kernels/fill_functor.h" #include "tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h" +#include "tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/platform/errors.h" namespace tensorflow { // Fuse Operation -template -class MklFusedMatMulOp : public MklDnnMatMulOpBase { +template +class MklFusedMatMulOp : public MklDnnMatMulOpBase { public: explicit MklFusedMatMulOp(OpKernelConstruction* ctx) - : MklDnnMatMulOpBase(ctx) { + : MklDnnMatMulOpBase(ctx) { + if (std::is_same::value) { + return; // Quantized version will have own contstruction code. + } OP_REQUIRES_OK(ctx, ctx->GetAttr("fused_ops", &fused_ops_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("transpose_a", &transpose_a_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("transpose_b", &transpose_b_)); @@ -41,7 +51,6 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { OP_REQUIRES_OK( ctx, ctx->GetAttr("is_filter_const", &(this->is_weight_const_))); } - OP_REQUIRES(ctx, fused_ops_.size() <= 2, absl::InvalidArgumentError( "MklFusedMatMul must have 2 post-arguments at most.")); @@ -54,7 +63,7 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { ctx, transpose_a_ == false, absl::InvalidArgumentError("In[0] of MklMatMul can't be transposed.")); if (fused_ops_.size() == 2 && fused_ops_[1] == "LeakyRelu") { - OP_REQUIRES_OK(ctx, ctx->GetAttr("leakyrelu_alpha", &leakyrelu_alpha)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("leakyrelu_alpha", &leakyrelu_alpha_)); } } @@ -64,7 +73,7 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { const Tensor& weight_tensor = ctx->input(this->kInputIndexWeight); const Tensor& bias_tensor = MklGetInput(ctx, this->kInputIndexBias); - if (std::is_same::value) { + if (std::is_same::value) { (void)SetFPMathMode(); } @@ -134,15 +143,16 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { memory::format_tag::nc, this->is_weight_const_); // Extend the basic parameters for data types and fusions. ExtendMklDnnMatMulFwdParams(ctx, matmul_params); - auto st = ExecuteSingleThreadedGemm(batch, channel, k, sizeof(T)); + auto st = ExecuteSingleThreadedGemm(batch, channel, k, sizeof(T1)); // Create the oneDNN wrapper over Eigen threadpool and set max threads // in oneDNN. Eigen::ThreadPoolInterface* eigen_interface = EigenThreadPoolFromTfContext(ctx); tsl::OneDnnThreadPool eigen_tp(eigen_interface, ThreadPoolUseCallerThread(), st ? 1 : -1); - MklDnnMatMulFwdPrimitive* matmul_prim = - MklDnnMatMulFwdPrimitiveFactory::Get(matmul_params, 0); + MklDnnMatMulFwdPrimitive* matmul_prim = + MklDnnMatMulFwdPrimitiveFactory::Get( + matmul_params, 0); // Allocate output tensor. Tensor* dst_tensor = nullptr; @@ -158,17 +168,17 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { TensorShape output_tf_shape({batch, channel}); if (fuse_add_) { - const Tensor& add_tensor = MklGetInput(ctx, kInputIndex_Add); + const Tensor& add_tensor = MklGetInput(ctx, input_idx_add_); MklDnnShape add_mkl_shape; - GetMklShape(ctx, kInputIndex_Add, &add_mkl_shape, native_format); + GetMklShape(ctx, input_idx_add_, &add_mkl_shape, native_format); // For native format, we need not to set metadata. - if (native_format && ctx->forward_input_to_output_with_shape( - kInputIndex_Add, kOutputIndex_Dst, - output_tf_shape, &dst_tensor)) { + if (native_format && + ctx->forward_input_to_output_with_shape( + input_idx_add_, kOutputIndex_Dst, output_tf_shape, &dst_tensor)) { ; // Need to do nothing for native format } else if (!native_format && ForwardMklTensorInToOutWithMklShape( - ctx, kInputIndex_Add, kOutputIndex_Dst, + ctx, input_idx_add_, kOutputIndex_Dst, &dst_tensor, output_mkl_shape, false)) { ; // If it's not native format, need to forward and set meta first } else { @@ -182,19 +192,20 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { auto add_md = add_mkl_shape.IsMklTensor() ? add_mkl_shape.GetMklLayout() - : memory::desc(dst_dims, MklDnnType(), output_format_tag); + : memory::desc(dst_dims, MklDnnType(), output_format_tag); auto dst_md = - memory::desc(dst_dims, MklDnnType(), output_format_tag); + memory::desc(dst_dims, MklDnnType(), output_format_tag); void* add_buf = - static_cast(const_cast(add_tensor.flat().data())); - void* dst_buf = static_cast((dst_tensor)->flat().data()); + static_cast(const_cast(add_tensor.flat().data())); + void* dst_buf = + static_cast((dst_tensor)->flat().data()); if (native_format) { // We are simply deep copying the add_tensor to dst_tensor without // changing memory layout, hence using same memory descriptor. add_md = dst_md = - memory::desc({add_tensor.NumElements()}, MklDnnType(), + memory::desc({add_tensor.NumElements()}, MklDnnType(), dnnl::memory::format_tag::x); } @@ -218,31 +229,33 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { try { // Prepare the input and output for primitive. - T* src_data = const_cast(src_tensor.flat().data()); - T* weight_data = const_cast(weight_tensor.flat().data()); - T* bias_data = const_cast(bias_tensor.flat().data()); - T* dst_data = const_cast(dst_tensor->flat().data()); + T1* src_data = const_cast(src_tensor.flat().data()); + T2* weight_data = const_cast(weight_tensor.flat().data()); + void* bias_data = static_cast( + const_cast(bias_tensor.flat().data())); + Toutput* dst_data = + const_cast(dst_tensor->flat().data()); // Reorder input if necessary. - MklDnnData src_mkl(&(this->cpu_engine_)); - MklDnnData weight_mkl(&(this->cpu_engine_)); + MklDnnData src_mkl(&(this->cpu_engine_)); + MklDnnData weight_mkl(&(this->cpu_engine_)); auto src_md = src_mkl_shape.IsMklTensor() ? src_mkl_shape.GetMklLayout() - : memory::desc(src_dims, MklDnnType(), src_format); + : memory::desc(src_dims, MklDnnType(), src_format); if (src_md != matmul_pd->src_desc()) { src_mkl.SetUsrMem(src_md, src_data); src_mkl.CheckReorderToOpMem(matmul_pd.get()->src_desc(), this->cpu_engine_, ctx); - src_data = reinterpret_cast(src_mkl.GetOpMem().get_data_handle()); + src_data = static_cast(src_mkl.GetOpMem().get_data_handle()); } // Get cached data when weight is const. const memory::desc weight_md = - memory::desc(weight_dims, MklDnnType(), weight_format); + memory::desc(weight_dims, MklDnnType(), weight_format); if (weight_md != matmul_pd->weights_desc()) { - T* cached_weight_data = nullptr; + T2* cached_weight_data = nullptr; if (this->is_weight_const_) { // TODO(intel-tf): When oneDNN major version changes to v4.x, weight @@ -268,16 +281,22 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { weight_mkl.CheckReorderToOpMem(matmul_pd.get()->weights_desc(), this->cpu_engine_, ctx); weight_data = - reinterpret_cast(weight_mkl.GetOpMem().get_data_handle()); + static_cast(weight_mkl.GetOpMem().get_data_handle()); } } std::shared_ptr cpu_stream; - cpu_stream.reset(CreateStream(&eigen_tp, matmul_prim->GetEngine())); UserScratchPad scratch_pad; scratch_pad.AllocateSPTensor(matmul_prim, ctx); + // Temporary tensor for scaled bias when op is quantized version. + Tensor temp_scaled_bias_tensor; + if (std::is_same::value) { + this->GetScaledBias(ctx, matmul_pd, bias_tensor, + &temp_scaled_bias_tensor, &bias_data); + } + // Execute fused matmul op. matmul_prim->Execute(src_data, weight_data, bias_data, dst_data, matmul_params, scratch_pad.Get(), cpu_stream); @@ -290,30 +309,31 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { } } - void ExtendMklDnnMatMulFwdParams(OpKernelContext* ctx, - MklDnnMatMulFwdParams& params) { + virtual void ExtendMklDnnMatMulFwdParams(OpKernelContext* ctx, + MklDnnMatMulFwdParams& params) { + // Create a string from data types of input, weight, bias, and output. + params.dtypes.append(typeid(T1).name()); + params.dtypes.append(typeid(T2).name()); + params.dtypes.append(typeid(Tbias).name()); + params.dtypes.append(typeid(Toutput).name()); if (fused_ops_.size() == 2) { string post_op = fused_ops_[1]; - - if (post_op == "Relu") { - params.post_op_params.push_back({"relu", {1.0, 0.0, 0.0}}); - } else if (post_op == "Relu6") { - params.post_op_params.push_back({"relu6", {1.0, 6.0, 0.0}}); + float scale = 1.0f; + float alpha = 0.0f; + float beta = 0.0f; + if (post_op == "Relu6") { + alpha = 6.0f; + } else if (post_op == "LeakyRelu") { + alpha = leakyrelu_alpha_; } else if (post_op == "Elu") { - params.post_op_params.push_back({"elu", {1.0, 1.0, 0.0}}); - } else if (post_op == "GeluApproximate") { - params.post_op_params.push_back({"gelu_approximate", {1.0, 1.0, 0.0}}); - } else if (post_op == "GeluExact") { - params.post_op_params.push_back({"gelu_exact", {1.0, 1.0, 0.0}}); - } else if (post_op == "Tanh") { - params.post_op_params.push_back({"tanh", {1.0, 0.0, 0.0}}); + alpha = 1.0f; + } + if (post_op == "Relu" || post_op == "Relu6" || post_op == "LeakyRelu" || + post_op == "Elu" || post_op == "GeluApproximate" || + post_op == "GeluExact" || post_op == "Tanh" || post_op == "Sigmoid") { + params.post_op_params.push_back({post_op, {scale, alpha, beta}}); } else if (post_op == "Add") { params.post_op_params.push_back({"sum", {1.0}}); - } else if (post_op == "LeakyRelu") { - params.post_op_params.push_back( - {"leakyrelu", {1.0, leakyrelu_alpha, 0.0}}); - } else if (post_op == "Sigmoid") { - params.post_op_params.push_back({"logistic", {1.0, 0.0, 0.0}}); } else { OP_REQUIRES_OK(ctx, absl::InvalidArgumentError(absl::StrCat( "Unsupported post-argument in MklFusedMatMul: ", @@ -322,34 +342,630 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { } } - private: + protected: + virtual void GetScaledBias( + OpKernelContext*, + std::shared_ptr&, + const Tensor&, Tensor*, void**) {} + bool fuse_add_ = false; bool transpose_a_; bool transpose_b_; - float leakyrelu_alpha = 0.2; + float leakyrelu_alpha_ = 0.2; std::vector fused_ops_; - const int kInputIndex_Add = 3; + int input_idx_add_ = 3; const int kOutputIndex_Dst = 0; -}; // namespace tensorflow +#ifdef DNNL_AARCH64_USE_ACL + const int kWeightTensorHashLength = 1024; +#endif +}; + +namespace { + +enum class FusedComputationType { + kUndefined, + kBiasAdd, + kBiasAdd_Dequantize, + kBiasAdd_Requantize, + kBiasAdd_Activation, + kBiasAdd_Activation_Dequantize, + kBiasAdd_Activation_Requantize, + kBiasAdd_Add, + kBiasAdd_Add_Dequantize, + kBiasAdd_Add_Requantize, +}; + +struct FusedComputationPattern { + FusedComputationType fused_computation; + std::vector fused_ops; +}; + +} // namespace + +// OneDNN uses post-ops to implement different kind of fusions. The category of +// each individual post-op can be inferred from the fused_ops attribute. The +// following enum is used to identify list of required post-ops. +enum class PostOpKind { kActivation, kSum, kOutputScale, kLinear }; + +template +class QuantizedFusedMatMulOp + : public MklFusedMatMulOp { + protected: + string input_quant_mode_; // 0-th input + string output_quant_mode_; // 0-th output + string activation_type_; // Activation op type + + // Initialize minmax tensor indices with default values for the most common + // cases. + int input_min_idx_ = 3; + int input_max_idx_ = 4; + int weight_min_idx_ = 5; + int weight_max_idx_ = 6; + + struct PostOpInfo { + PostOpKind post_op_kind; + struct OperandInfo { + int idx = -1; // Operand tensor index if needed by a post-op. + // Indices of min and max value tensors, if the operand is quantized. + gtl::InlinedVector min_max_indices; + } operand_info; + // Indices of output min and max value tensors. It is used when requantize + // is fused. + gtl::InlinedVector min_max_indices; + }; + + gtl::InlinedVector post_op_info_list_; + + void Initialize(OpKernelConstruction* context) { + OP_REQUIRES_OK(context, + context->GetAttr("transpose_a", &this->transpose_a_)); + OP_REQUIRES_OK(context, + context->GetAttr("transpose_b", &this->transpose_b_)); + OP_REQUIRES_OK(context, + context->GetAttr("input_quant_mode", &input_quant_mode_)); + OP_REQUIRES_OK(context, + context->GetAttr("output_quant_mode", &output_quant_mode_)); + OP_REQUIRES_OK( + context, context->GetAttr("is_weight_const", &this->is_weight_const_)); + OP_REQUIRES_OK(context, + context->GetAttr("is_bias_const", &this->is_bias_const_)); + if (context->HasAttr("leakyrelu_alpha")) { + OP_REQUIRES_OK(context, context->GetAttr("leakyrelu_alpha", + &this->leakyrelu_alpha_)); + } + + // Extract activation info and canonicalize activation types to + // common name "Activation" in the fused_ops attribute. + std::vector fused_ops; + OP_REQUIRES_OK(context, context->GetAttr("fused_ops", &fused_ops)); + for (auto it = fused_ops.begin(); it != fused_ops.end(); ++it) { + if (*it == "Relu" || *it == "Relu6" || *it == "Elu" || + *it == "GeluApproximate" || *it == "GeluExact" || *it == "Tanh" || + *it == "LeakyRelu" || *it == "Sigmoid") { + if (*it != "Relu") { + string last_fusion = fused_ops.back(); + OP_REQUIRES( + context, + (last_fusion == "Dequantize" || last_fusion == "Requantize"), + absl::UnimplementedError(absl::StrCat( + "Nonlinear activation except Relu can be ", + "supported only with Dequantize or Requantize fusion."))); + } + activation_type_ = *it; + // Canonicalize all activation types into "Activation" for simplifying + // post ops construction. + *it = "Activation"; + } + } + + using FCT = FusedComputationType; + + // TODO(intel-tf): Add more patterns when implemented. + std::vector patterns{ + {FCT::kBiasAdd, {"BiasAdd"}}, + {FCT::kBiasAdd_Dequantize, {"BiasAdd", "Dequantize"}}, + {FCT::kBiasAdd_Requantize, {"BiasAdd", "Requantize"}}, + {FCT::kBiasAdd_Activation, {"BiasAdd", "Activation"}}, + {FCT::kBiasAdd_Activation_Dequantize, + {"BiasAdd", "Activation", "Dequantize"}}, + {FCT::kBiasAdd_Activation_Requantize, + {"BiasAdd", "Activation", "Requantize"}}, + {FCT::kBiasAdd_Add_Dequantize, {"BiasAdd", "Add", "Dequantize"}}, + }; + + FusedComputationType fused_computation = FusedComputationType::kUndefined; + for (const auto& pattern : patterns) { + if (fused_ops == pattern.fused_ops) { + fused_computation = pattern.fused_computation; + break; + } + } + + // Configure oneDNN post ops + switch (fused_computation) { + case FCT::kBiasAdd: + // No post op is required. + OP_REQUIRES(context, (std::is_same::value), + absl::UnimplementedError(absl::StrCat( + "Qunatized fusion: [", absl::StrJoin(fused_ops, ","), + "] needs output in qint32."))); + break; + case FCT::kBiasAdd_Dequantize: + post_op_info_list_ = {{PostOpKind::kOutputScale, {}, {}}}; + break; + case FCT::kBiasAdd_Requantize: + post_op_info_list_ = {{PostOpKind::kOutputScale, {}, {}}, + {PostOpKind::kLinear, {}, {7, 8}}}; + break; + case FCT::kBiasAdd_Activation: + OP_REQUIRES(context, + (std::is_same::value && + activation_type_ == "Relu"), + absl::UnimplementedError(absl::StrCat( + "Qunatized fusion: [", absl::StrJoin(fused_ops, ","), + "] needs output in qint32 and ", + "activation supported is only Relu"))); + post_op_info_list_ = {{PostOpKind::kActivation, {}, {}}}; + break; + case FCT::kBiasAdd_Activation_Dequantize: + post_op_info_list_ = {{PostOpKind::kOutputScale, {}, {}}, + {PostOpKind::kActivation, {}, {}}}; + break; + case FCT::kBiasAdd_Activation_Requantize: + post_op_info_list_ = {{PostOpKind::kOutputScale, {}, {}}, + {PostOpKind::kActivation, {}, {}}, + {PostOpKind::kLinear, {}, {7, 8}}}; + break; + case FCT::kBiasAdd_Add_Dequantize: { + OP_REQUIRES( + context, + (std::is_same::value || std::is_same::value), + absl::UnimplementedError( + "Quantized addend tensor is not implemented yet.")); + // Addend tensor precedes all minmax tensors. Shift the indices from + // default initilized values. + input_min_idx_ += 1; + input_max_idx_ += 1; + weight_min_idx_ += 1; + weight_max_idx_ += 1; + post_op_info_list_ = {{PostOpKind::kOutputScale, {}, {}}, + {PostOpKind::kSum, {3, {}}, {}}}; + } break; + default: + OP_REQUIRES(context, false, + absl::UnimplementedError( + absl::StrCat("Fusion is not implemented: [", + absl::StrJoin(fused_ops, ","), "]"))); + } + } + + public: + explicit QuantizedFusedMatMulOp(OpKernelConstruction* context) + : MklFusedMatMulOp(context) { + Initialize(context); + } + + void Compute(OpKernelContext* ctx) override { + MklFusedMatMulOp::Compute(ctx); + // Compute additional outputs + if (std::is_same::value || + std::is_same::value || + std::is_same::value) { + Tensor* min_output = nullptr; + Tensor* max_output = nullptr; + + const float min_input = ctx->input(input_min_idx_).flat()(0); + const float max_input = ctx->input(input_max_idx_).flat()(0); + const Tensor& min_weight = ctx->input(weight_min_idx_); + const Tensor& max_weight = ctx->input(weight_max_idx_); + OP_REQUIRES(ctx, min_weight.shape() == max_weight.shape(), + absl::InvalidArgumentError( + "Shape of min-weight and max-weight must be same.")); + + if (std::is_same::value) { + TensorShape output_minmax_shape = min_weight.shape(); + OP_REQUIRES_OK( + ctx, ctx->allocate_output(1, output_minmax_shape, &min_output)); + OP_REQUIRES_OK( + ctx, ctx->allocate_output(2, output_minmax_shape, &max_output)); + if (min_weight.dims() == 0) { + float min_output_value; + float max_output_value; + MklQuantizationRangeForMultiplication( + min_input, max_input, min_weight.flat()(0), + max_weight.flat()(0), &min_output_value, + &max_output_value); + min_output->flat()(0) = min_output_value; + max_output->flat()(0) = max_output_value; + } else { + MklQuantizationRangeForMultiplication( + min_input, max_input, min_weight, max_weight, &min_output, + &max_output); + } + } else { + // When output type is qint8 or quint8, the kernel is registered for + // Requantize fusion. + OP_REQUIRES_OK(ctx, ctx->allocate_output(1, {}, &min_output)); + OP_REQUIRES_OK(ctx, ctx->allocate_output(2, {}, &max_output)); + int output_min_idx = ctx->num_inputs() - 2; + int output_max_idx = ctx->num_inputs() - 1; + const float requested_min = ctx->input(output_min_idx).flat()(0); + const float requested_max = ctx->input(output_max_idx).flat()(0); + if (output_quant_mode_ == "SCALED") { + const float range_output = + std::max(std::abs(requested_min), std::abs(requested_max)); + if (std::is_same::value) { + min_output->flat()(0) = -range_output; + max_output->flat()(0) = range_output; + } else { + min_output->flat()(0) = 0; + max_output->flat()(0) = range_output; + } + } else { + min_output->flat()(0) = requested_min; + max_output->flat()(0) = requested_max; + } + } + } else if (std::is_same::value || + std::is_same::value) { + // Kernel is registered for Dequantization fusion. Nothing to do. + } else { + OP_REQUIRES_OK(ctx, + absl::InvalidArgumentError("Unsupported output type.")); + } + } + + void ExtendMklDnnMatMulFwdParams(OpKernelContext* ctx, + MklDnnMatMulFwdParams& params) override { + // Create a string from data types of input, weight, bias, and output. + params.dtypes.append(typeid(T1).name()); + params.dtypes.append(typeid(T2).name()); + params.dtypes.append(typeid(Tbias).name()); + params.dtypes.append(typeid(Toutput).name()); + + params.input_quant_mode = input_quant_mode_; + + for (const auto& post_op_info : post_op_info_list_) { + auto post_op_kind = post_op_info.post_op_kind; + switch (post_op_kind) { + case PostOpKind::kOutputScale: { + if constexpr (std::is_same::value) { + // No scaling is required. + break; + } + const float min_input = ctx->input(input_min_idx_).flat()(0); + const float max_input = ctx->input(input_max_idx_).flat()(0); + const Tensor& min_weight_tensor = ctx->input(weight_min_idx_); + const Tensor& max_weight_tensor = ctx->input(weight_max_idx_); + const float* min_weight = min_weight_tensor.flat().data(); + const float* max_weight = max_weight_tensor.flat().data(); + const size_t num_weight_scales = min_weight_tensor.NumElements(); + + const float max_int8_input = + (std::is_same::value) ? 255.0f : 127.0f; + const float max_int8_weight = + (std::is_same::value) ? 255.0f : 127.0f; + const float range_input = + (input_quant_mode_ == "MIN_FIRST") + ? max_input - min_input + : std::max(std::abs(min_input), std::abs(max_input)); + + const float src_scale = range_input / max_int8_input; + std::vector wei_scales(num_weight_scales); +#ifndef ENABLE_ONEDNN_V3 + std::vector output_scales(num_weight_scales); +#endif // ENABLE_ONEDNN_V3 + for (size_t i = 0; i < num_weight_scales; ++i) { + float range_weight = + std::max(std::abs(min_weight[i]), std::abs(max_weight[i])); + wei_scales[i] = range_weight / max_int8_weight; +#ifndef ENABLE_ONEDNN_V3 + output_scales[i] = src_scale * wei_scales[i]; +#endif // ENABLE_ONEDNN_V3 + } + FactoryKeyCreator src_partial_key; + src_partial_key.AddAsKey(min_input); + src_partial_key.AddAsKey(max_input); + + FactoryKeyCreator wei_partial_key; + wei_partial_key.AddAsKey(min_weight); + wei_partial_key.AddAsKey(max_weight); +#ifndef ENABLE_ONEDNN_V3 + FactoryKeyCreator output_scales_partial_key; + output_scales_partial_key.AddAsKey(src_partial_key.GetKey()); + output_scales_partial_key.AddAsKey(wei_partial_key.GetKey()); + params.post_op_params.push_back({"output_scale", output_scales, + output_scales_partial_key.GetKey()}); +#else + params.post_op_params.push_back( + {"src_scale", {src_scale}, src_partial_key.GetKey()}); + params.post_op_params.push_back( + {"wei_scale", wei_scales, wei_partial_key.GetKey()}); +#endif // ENABLE_ONEDNN_V3 + } break; + + case PostOpKind::kActivation: { + float scale = 1.0f; + float alpha = 0.0f; + float beta = 0.0f; + if (activation_type_ == "LeakyRelu") + alpha = this->leakyrelu_alpha_; + else if (activation_type_ == "Relu6") + alpha = 6.0f; + else if (activation_type_ == "Elu") + alpha = 1.0f; + params.post_op_params.push_back( + {activation_type_, {scale, alpha, beta}}); + } break; + + case PostOpKind::kLinear: { + // Update output_scale for requantize fusion. + auto output_min_idx = post_op_info.min_max_indices[0]; + auto output_max_idx = post_op_info.min_max_indices[1]; + const float min_output = + ctx->input(output_min_idx).template flat()(0); + const float max_output = + ctx->input(output_max_idx).template flat()(0); + const float max_int8_output = + (std::is_same::value) ? 255.0f : 127.0f; + const float range_output = + (output_quant_mode_ == "MIN_FIRST") + ? max_output - min_output + : std::max(std::abs(min_output), std::abs(max_output)); + float req_scale = max_int8_output / range_output; + float req_shift = 0.0f; + if (output_quant_mode_ == "MIN_FIRST") { + req_shift = -min_output * max_int8_output / range_output; + } + params.post_op_params.push_back( + {"linear", {1.0, req_scale, req_shift}}); + } break; + + case PostOpKind::kSum: { + this->fuse_add_ = true; + this->input_idx_add_ = post_op_info.operand_info.idx; + params.post_op_params.push_back({"sum", {1.0}}); + } break; + + default: + OP_REQUIRES_OK( + ctx, absl::InvalidArgumentError("Unsupported post-op-kind.")); + } + } + } + + void GetScaledBias( + OpKernelContext* ctx, + std::shared_ptr& matmul_pd, + const Tensor& bias_tensor, Tensor* temp_scaled_bias_tensor, + void** bias_data) override { +#ifdef ENABLE_ONEDNN_V3 +#define TSCALED_BIAS float +#else +#define TSCALED_BIAS Tbias +#endif // ENABLE_ONEDNN_V3 + +#ifndef ENABLE_ONEDNN_V3 + if (std::is_same::value) { + // Bias already has been scaled for quantized input and weight. +#else + if ((std::is_same::value || + std::is_same::value) && + input_quant_mode_ == "SCALED") { +#endif // !ENABLE_ONEDNN_V3 + return; + } else { + const float min_input = ctx->input(input_min_idx_).flat()(0); + const float max_input = ctx->input(input_max_idx_).flat()(0); + const Tensor& min_weight_tensor = ctx->input(weight_min_idx_); + const Tensor& max_weight_tensor = ctx->input(weight_max_idx_); + const float* min_weight = min_weight_tensor.flat().data(); + const float* max_weight = max_weight_tensor.flat().data(); + bool is_cached_bias_valid = false; + bool is_bias_cache_empty = this->IsBiasCacheEmpty(); + if (!is_bias_cache_empty) { + this->GetCachedBias(min_input, max_input, bias_data); + is_cached_bias_valid = (*bias_data != nullptr); + } + if (!is_cached_bias_valid) { + void* input_bias_buf = static_cast( + const_cast(bias_tensor.flat().data())); + auto scaled_bias_md = matmul_pd->bias_desc(); + TensorShape scaled_bias_shape; + scaled_bias_shape.AddDim((scaled_bias_md.get_size() / sizeof(float))); + OP_REQUIRES_OK(ctx, ctx->allocate_temp( + DataTypeToEnum::v(), + scaled_bias_shape, temp_scaled_bias_tensor)); + void* scaled_bias_buf = static_cast( + temp_scaled_bias_tensor->flat().data()); + + const float max_int8_input = + (std::is_same::value) ? 255.0f : 127.0f; + const float max_int8_weight = + (std::is_same::value) ? 255.0f : 127.0f; + const float range_input = + (input_quant_mode_ == "MIN_FIRST") + ? max_input - min_input + : std::max(std::abs(min_input), std::abs(max_input)); + const size_t num_weight_scales = min_weight_tensor.NumElements(); + std::vector bias_scales(num_weight_scales, 1.0); + for (size_t i = 0; i < num_weight_scales; ++i) { + float range_weight = + std::max(std::abs(min_weight[i]), std::abs(max_weight[i])); + float scale_factor = + (max_int8_input * max_int8_weight) / (range_input * range_weight); + bias_scales[i] = scale_factor; + } + if (input_quant_mode_ == "MIN_FIRST") { + Tbias* input_bias = (Tbias*)input_bias_buf; + TSCALED_BIAS* adjusted_bias = (TSCALED_BIAS*)scaled_bias_buf; + float q_min_input = max_int8_input * min_input / range_input; + const Tensor& weight_tensor = ctx->input(1); + int stride_ic = 1; + int stride_oc = 1; + int k = 0; + int n = 0; + if (this->transpose_b_) { + k = weight_tensor.dim_size(1); + n = weight_tensor.dim_size(0); + stride_ic = 1; + stride_oc = k; + } else { + k = weight_tensor.dim_size(0); + n = weight_tensor.dim_size(1); + stride_ic = n; + stride_oc = 1; + } + T2* weight_buf = const_cast(weight_tensor.flat().data()); + std::vector scales(n); + if (num_weight_scales == 1) { + // Weights are quantized per_tensor. Scales need to be expanded to + // number of output channels. + std::fill(scales.begin(), scales.end(), bias_scales[0]); + } else { + scales = bias_scales; + } + // TODO(intel-tf): Paralellize loop for large weights. + for (int j = 0; j < n; ++j) { + int sum = 0; + for (int i = 0; i < k; ++i) { + sum += weight_buf[i * stride_ic + j * stride_oc]; + } +#ifndef ENABLE_ONEDNN_V3 + adjusted_bias[j] = static_cast( + (static_cast(input_bias[j]) * scales[j]) + + (sum * q_min_input)); +#else + // TODO(intel-tf): Use zeropoint for quantized input tensor instead + // of manual adjustments. + if (std::is_same::value) { + // Starting with oneDNN v3.0, bias is expected to be dequantized + // to float32. + adjusted_bias[j] = static_cast(input_bias[j]) / scales[j]; + } else { + // Bias is float32 or bfloat16 but still needs to be compensated. + adjusted_bias[j] = static_cast(input_bias[j]) + + ((sum * q_min_input) / scales[j]); + } +#endif // !ENABLE_ONEDNN_V3 + } + } else { + memory::dims input_bias_dims = + memory::dims({bias_tensor.shape().dim_size(0)}); + auto input_bias_md = dnnl::memory::desc( + input_bias_dims, MklDnnType(), memory::format_tag::x); + auto input_bias_mem = + dnnl::memory(input_bias_md, this->cpu_engine_, input_bias_buf); + auto scaled_bias_mem = + dnnl::memory(scaled_bias_md, this->cpu_engine_, scaled_bias_buf); + dnnl::primitive_attr bias_attr; +#ifndef ENABLE_ONEDNN_V3 + (num_weight_scales == 1) + ? bias_attr.set_output_scales(0, bias_scales) + : bias_attr.set_output_scales(1, bias_scales); +#else + (num_weight_scales == 1) ? bias_attr.set_scales_mask(DNNL_ARG_SRC, 0) + : bias_attr.set_scales_mask(DNNL_ARG_SRC, 1); +#endif // !ENABLE_ONEDNN_V3 + auto reorder_prim = + dnnl::reorder(input_bias_mem, scaled_bias_mem, bias_attr); + std::unordered_map reorder_net_args = { + {DNNL_ARG_FROM, input_bias_mem}, {DNNL_ARG_TO, scaled_bias_mem}}; +#ifdef ENABLE_ONEDNN_V3 + auto scale_mem = + memory({{1}, MklDnnType(), memory::format_tag::x}, + this->cpu_engine_, bias_scales.data()); + reorder_net_args.insert( + {DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC, scale_mem}); +#endif // ENABLE_ONEDNN_V3 + reorder_prim.execute(dnnl::stream(this->cpu_engine_), + reorder_net_args); + } + + *bias_data = temp_scaled_bias_tensor->flat().data(); + + if (is_bias_cache_empty) { + // Only try to cache the bias in the first iteration. + this->CacheBias(ctx, *temp_scaled_bias_tensor, min_input, max_input); + } + } + } + } + + bool IsCachedBiasValid(float current_min_input, + float current_max_input) override + TF_LOCKS_EXCLUDED(this->bias_cache_mutex_) { + tf_shared_lock lock(this->bias_cache_mutex_); + if (this->is_bias_const_ && this->is_weight_const_ && + std::abs(current_min_input - this->saved_min_input_) < 1e-5 && + std::abs(current_max_input - this->saved_max_input_) < 1e-5) { + return true; + } + return false; + } +}; // Register mkl kernels for supported operations and types. -#define REGISTER_FUSEDMATMUL_MKL_SUPPORTED_KERNELS_TYPES(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("_MklFusedMatMul") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .Label(mkl_op_registry::kMklLayoutDependentOpLabel), \ - MklFusedMatMulOp); \ - REGISTER_KERNEL_BUILDER(Name("_MklNativeFusedMatMul") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .Label(mkl_op_registry::kMklNameChangeOpLabel), \ - MklFusedMatMulOp); +#define REGISTER_FUSEDMATMUL_MKL_SUPPORTED_KERNELS_TYPES(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("_MklFusedMatMul") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .Label(mkl_op_registry::kMklLayoutDependentOpLabel), \ + MklFusedMatMulOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("_MklNativeFusedMatMul") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .Label(mkl_op_registry::kMklNameChangeOpLabel), \ + MklFusedMatMulOp); TF_CALL_float(REGISTER_FUSEDMATMUL_MKL_SUPPORTED_KERNELS_TYPES); TF_CALL_bfloat16(REGISTER_FUSEDMATMUL_MKL_SUPPORTED_KERNELS_TYPES); TF_CALL_half(REGISTER_FUSEDMATMUL_MKL_SUPPORTED_KERNELS_TYPES); #undef REGISTER_FUSEDMATMUL_MKL_SUPPORTED_KERNELS_TYPES +#define REGISTER_QUANTIZED_MATMUL(input_type, weight_type, bias_type, \ + output_type, additional_type) \ + REGISTER_KERNEL_BUILDER( \ + Name("_QuantizedMatMul") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T1") \ + .TypeConstraint("T2") \ + .TypeConstraint("Tbias") \ + .TypeConstraint("Tout") \ + .TypeConstraint("U"), \ + QuantizedFusedMatMulOp); + +#define REGISTER_ALL_OUTPUT_TYPES(input_type, weight_type, bias_type, \ + additional_type) \ + REGISTER_QUANTIZED_MATMUL(input_type, weight_type, bias_type, qint8, \ + additional_type) \ + REGISTER_QUANTIZED_MATMUL(input_type, weight_type, bias_type, quint8, \ + additional_type) \ + REGISTER_QUANTIZED_MATMUL(input_type, weight_type, bias_type, qint32, \ + additional_type) \ + REGISTER_QUANTIZED_MATMUL(input_type, weight_type, bias_type, float, \ + additional_type) \ + REGISTER_QUANTIZED_MATMUL(input_type, weight_type, bias_type, bfloat16, \ + additional_type) + +#define REGISTER_ALL_BIAS_OUTPUT_TYPES(input_type, weight_type, \ + additional_type) \ + REGISTER_ALL_OUTPUT_TYPES(input_type, weight_type, float, additional_type) \ + REGISTER_ALL_OUTPUT_TYPES(input_type, weight_type, bfloat16, \ + additional_type) \ + REGISTER_ALL_OUTPUT_TYPES(input_type, weight_type, qint32, additional_type) + +#define REGISTER_ALL_INPUT_BIAS_OUTPUT_TYPES(weight_type, additional_type) \ + REGISTER_ALL_BIAS_OUTPUT_TYPES(qint8, weight_type, additional_type) \ + REGISTER_ALL_BIAS_OUTPUT_TYPES(quint8, weight_type, additional_type) + +REGISTER_ALL_INPUT_BIAS_OUTPUT_TYPES(qint8, float); +REGISTER_ALL_INPUT_BIAS_OUTPUT_TYPES(qint8, bfloat16); + } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h b/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h index 922e6464663bb5..6159b1d047ea65 100644 --- a/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h +++ b/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h @@ -21,7 +21,6 @@ limitations under the License. #include #include -#include "unsupported/Eigen/CXX11/Tensor" // from @eigen_archive #include "dnnl.hpp" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -29,6 +28,7 @@ limitations under the License. #include "tensorflow/core/kernels/mkl/mkl_kernel_util.h" #include "tensorflow/core/util/mkl_util.h" #include "tensorflow/core/util/onednn_env_vars.h" +#include "unsupported/Eigen/CXX11/Tensor" // from @eigen_archive #if defined(DNNL_AARCH64_USE_ACL) && defined(ENABLE_ONEDNN_OPENMP) #include "tensorflow/core/platform/mutex.h" #endif @@ -102,8 +102,10 @@ struct MklDnnMatMulFwdParams { struct PostOpParam { string name; std::vector param; + string partial_key = string(""); }; std::vector post_op_params; + string input_quant_mode; MklDnnMatMulFwdParams( memory::dims src_dims, memory::dims weight_dims, memory::dims bias_dims, @@ -244,7 +246,7 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { dst_scale_mem(nullptr), #ifndef ENABLE_ONEDNN_V3 fwd_desc(nullptr), -#endif // !ENABLE_ONEDNN_V3 +#endif // ENABLE_ONEDNN_V3 fwd_pd(nullptr), src_md(nullptr), weight_md(nullptr), @@ -276,15 +278,26 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { MklDnnType(), matmul_fwd_params.dst_format)); - if (std::is_same::value) { - context_.bias_md.reset(new memory::desc({matmul_fwd_params.bias_dims}, - MklDnnType(), - memory::format_tag::any)); + memory::data_type bias_dt; +#ifndef ENABLE_ONEDNN_V3 + bias_dt = MklDnnType(); +#else + if (std::is_same::value) { + // For QuantizedMatMul, bias needs to be passed to oneDNN as float of + // bfloat16 (even if Tbias is qint32). + if (std::is_same::value && + matmul_fwd_params.input_quant_mode == "SCALED") { + bias_dt = MklDnnType(); + } else { + bias_dt = MklDnnType(); + } } else { - context_.bias_md.reset(new memory::desc({matmul_fwd_params.bias_dims}, - MklDnnType(), - memory::format_tag::any)); + bias_dt = MklDnnType(); } +#endif // !ENABLE_ONEDNN_V3 + context_.bias_md.reset(new memory::desc({matmul_fwd_params.bias_dims}, + bias_dt, memory::format_tag::any)); + // Create an inner-product. #ifndef ENABLE_ONEDNN_V3 context_.fwd_desc.reset(new inner_product_forward::desc( @@ -304,60 +317,68 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { std::unordered_map is_scale_set; if (!post_op_params.empty()) { for (auto const& post_op_param : post_op_params) { - if (post_op_param.name == "relu" || post_op_param.name == "leakyrelu") { + if (post_op_param.name == "Relu" || post_op_param.name == "LeakyRelu") { DCHECK_EQ(post_op_param.param.size(), 3); float op_scale = post_op_param.param[0]; float op_alpha = post_op_param.param[1]; float op_beta = post_op_param.param[2]; post_ops.APPEND_ELTWISE(op_scale, dnnl::algorithm::eltwise_relu, op_alpha, op_beta); - } else if (post_op_param.name == "relu6") { + } else if (post_op_param.name == "Relu6") { DCHECK_EQ(post_op_param.param.size(), 3); float op_scale = post_op_param.param[0]; float op_alpha = post_op_param.param[1]; float op_beta = post_op_param.param[2]; post_ops.APPEND_ELTWISE_RELU6(op_scale, op_alpha, op_beta); - } else if (post_op_param.name == "elu") { + } else if (post_op_param.name == "Elu") { DCHECK_EQ(post_op_param.param.size(), 3); float op_scale = post_op_param.param[0]; float op_alpha = post_op_param.param[1]; float op_beta = post_op_param.param[2]; post_ops.APPEND_ELTWISE(op_scale, dnnl::algorithm::eltwise_elu, op_alpha, op_beta); - } else if (post_op_param.name == "gelu_approximate") { + } else if (post_op_param.name == "GeluApproximate") { DCHECK_EQ(post_op_param.param.size(), 3); float op_scale = post_op_param.param[0]; float op_alpha = post_op_param.param[1]; float op_beta = post_op_param.param[2]; post_ops.APPEND_ELTWISE(op_scale, dnnl::algorithm::eltwise_gelu_tanh, op_alpha, op_beta); - } else if (post_op_param.name == "gelu_exact") { + } else if (post_op_param.name == "GeluExact") { DCHECK_EQ(post_op_param.param.size(), 3); float op_scale = post_op_param.param[0]; float op_alpha = post_op_param.param[1]; float op_beta = post_op_param.param[2]; post_ops.APPEND_ELTWISE(op_scale, dnnl::algorithm::eltwise_gelu_erf, op_alpha, op_beta); - } else if (post_op_param.name == "tanh") { + } else if (post_op_param.name == "Tanh") { DCHECK_EQ(post_op_param.param.size(), 3); float op_scale = post_op_param.param[0]; float op_alpha = post_op_param.param[1]; float op_beta = post_op_param.param[2]; post_ops.APPEND_ELTWISE(op_scale, dnnl::algorithm::eltwise_tanh, op_alpha, op_beta); - } else if (post_op_param.name == "logistic") { + } else if (post_op_param.name == "Sigmoid") { DCHECK_EQ(post_op_param.param.size(), 3); float op_scale = post_op_param.param[0]; float op_alpha = post_op_param.param[1]; float op_beta = post_op_param.param[2]; post_ops.APPEND_ELTWISE(op_scale, dnnl::algorithm::eltwise_logistic, op_alpha, op_beta); + } else if (post_op_param.name == "linear") { + DCHECK_EQ(post_op_param.param.size(), 3); + float op_scale = post_op_param.param[0]; + float op_alpha = post_op_param.param[1]; + float op_beta = post_op_param.param[2]; + post_ops.APPEND_ELTWISE(op_scale, dnnl::algorithm::eltwise_linear, + op_alpha, op_beta); #ifndef ENABLE_ONEDNN_V3 } else if (post_op_param.name == "output_scale") { - DCHECK_EQ(post_op_param.param.size(), 1); - std::vector scales; - scales.push_back(post_op_param.param[0]); - post_ops_attr.set_output_scales(0, scales); + if (post_op_param.param.size() == 1) { + post_ops_attr.set_output_scales(0, post_op_param.param); + } else { + post_ops_attr.set_output_scales(2, post_op_param.param); + } #else } else if (post_op_param.name == "src_scale") { is_scale_set.insert({"src", true}); @@ -368,14 +389,18 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { new memory(*context_.src_scale_md, cpu_engine_, DummyData)); } else if (post_op_param.name == "wei_scale") { is_scale_set.insert({"wei", true}); - post_ops_attr.set_scales_mask(DNNL_ARG_WEIGHTS, 0); - context_.wei_scale_md.reset(new memory::desc({1}, MklDnnType(), - memory::format_tag::x)); + const int scale_size = post_op_param.param.size(); + const int mask = scale_size == 1 ? 0 : 1; + post_ops_attr.set_scales_mask(DNNL_ARG_WEIGHTS, mask); + context_.wei_scale_md.reset(new memory::desc( + {scale_size}, MklDnnType(), memory::format_tag::x)); context_.wei_scale_mem.reset( new memory(*context_.wei_scale_md, cpu_engine_, DummyData)); } else if (post_op_param.name == "dst_scale") { is_scale_set.insert({"dst", true}); - post_ops_attr.set_scales_mask(DNNL_ARG_DST, 0); + const int scale_size = post_op_param.param.size(); + const int mask = scale_size == 1 ? 0 : 1; + post_ops_attr.set_scales_mask(DNNL_ARG_DST, mask); context_.dst_scale_md.reset(new memory::desc({1}, MklDnnType(), memory::format_tag::x)); context_.dst_scale_mem.reset( @@ -387,13 +412,15 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { post_ops.append_sum(op_scale); } else { - DCHECK((post_op_param.name == "relu") || - (post_op_param.name == "relu6") || - (post_op_param.name == "elu") || - (post_op_param.name == "tanh") || - (post_op_param.name == "logistic") || + DCHECK((post_op_param.name == "Relu") || + (post_op_param.name == "Relu6") || + (post_op_param.name == "Elu") || + (post_op_param.name == "GeluApproximate") || + (post_op_param.name == "GeluExact") || + (post_op_param.name == "Tanh") || + (post_op_param.name == "Sigmoid") || (post_op_param.name == "sum") || - (post_op_param.name == "leakyrelu") || OUTPUT_SCALE_DCHECK); + (post_op_param.name == "Leakyrelu") || OUTPUT_SCALE_DCHECK); } } post_ops_attr.set_post_ops(post_ops); @@ -433,11 +460,15 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { {DNNL_ARG_SCRATCHPAD, *context_.sp_mem}, {DNNL_ARG_DST, *context_.dst_mem}}; #ifdef ENABLE_ONEDNN_V3 - if (is_scale_set["src"] && is_scale_set["wei"] && is_scale_set["dst"]) { + if (is_scale_set["src"]) { net_args.insert( {DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC, *context_.src_scale_mem}); + } + if (is_scale_set["wei"]) { net_args.insert( {DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS, *context_.wei_scale_mem}); + } + if (is_scale_set["dst"]) { net_args.insert( {DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST, *context_.dst_scale_mem}); } @@ -510,12 +541,12 @@ class MklDnnMatMulFwdPrimitiveFactory : public MklPrimitiveFactory { // Generate keys for post-ops for (auto const& post_op_param : mkldnn_matmul_fwd_dims.post_op_params) { - if (post_op_param.name == "relu" || post_op_param.name == "relu6" || - post_op_param.name == "elu" || post_op_param.name == "tanh" || - post_op_param.name == "logistic" || - post_op_param.name == "leakyrelu" || - post_op_param.name == "gelu_approximate" || - post_op_param.name == "gelu_exact") { + if (post_op_param.name == "Relu" || post_op_param.name == "Relu6" || + post_op_param.name == "Elu" || post_op_param.name == "Tanh" || + post_op_param.name == "Sigmoid" || + post_op_param.name == "LeakyRelu" || + post_op_param.name == "GeluApproximate" || + post_op_param.name == "GeluExact" || post_op_param.name == "linear") { DCHECK_EQ(post_op_param.param.size(), 3); key_creator.AddAsKey(post_op_param.name); key_creator.AddAsKey(post_op_param.param[0]); @@ -532,9 +563,16 @@ class MklDnnMatMulFwdPrimitiveFactory : public MklPrimitiveFactory { post_op_param.name == "wei_scale" || post_op_param.name == "dst_scale") { #endif // !ENABLE_ONEDNN_V3 - DCHECK_EQ(post_op_param.param.size(), 1); key_creator.AddAsKey(post_op_param.name); - key_creator.AddAsKey(post_op_param.param[0]); + if (post_op_param.partial_key.empty()) { + DCHECK_GE(post_op_param.param.size(), 1); + // Old Quantized MatMul kernels do not create part of key beforehand + // as primitive caching-key-creation optimization. + key_creator.AddAsKey(post_op_param.param[0]); + } else { + // New Quantized MatMul kernels pre-create partial key. + key_creator.AddAsKey(post_op_param.partial_key); + } } else { return string("not_a_key"); } diff --git a/tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc b/tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc index efb33375d1669d..92a21e1255c778 100644 --- a/tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc @@ -783,7 +783,7 @@ class MklDnnQuantizedMatMulReluOp MklDnnQuantizedMatMulOp::ExtendMklDnnMatMulFwdParams(context, params); - params.post_op_params.push_back({"relu", {1.0, 0.0, 0.0}}); + params.post_op_params.push_back({"Relu", {1.0, 0.0, 0.0}}); } }; diff --git a/tensorflow/core/kernels/mkl/mkl_qmatmul_op_test.cc b/tensorflow/core/kernels/mkl/mkl_qmatmul_op_test.cc index 22b56e19e3bb63..3d862e0b8e8fd5 100644 --- a/tensorflow/core/kernels/mkl/mkl_qmatmul_op_test.cc +++ b/tensorflow/core/kernels/mkl/mkl_qmatmul_op_test.cc @@ -35,23 +35,43 @@ limitations under the License. namespace tensorflow { -class QuantizedMatMulTest : public OpsTestBase {}; +class QuantizedMatMulTest : public OpsTestBase, + public ::testing::WithParamInterface {}; // Two small matrices A of type uint8 and B of type int8 are multiplied // and the result is added with int32 bias -TEST_F(QuantizedMatMulTest, Small_withBias) { - TF_ASSERT_OK( - NodeDefBuilder("quantized_mat_mul_op", "_MklQuantizedMatMulWithBias") - .Input(FakeInput(DT_QUINT8)) - .Input(FakeInput(DT_QINT8)) - .Input(FakeInput(DT_QINT32)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Attr("Toutput", DataTypeToEnum::v()) - .Attr("_kernel", "QuantizedMklOp") - .Finalize(node_def())); +TEST_P(QuantizedMatMulTest, Small_withBias) { + const bool is_old_api = GetParam(); + if (is_old_api) { + TF_ASSERT_OK( + NodeDefBuilder("quantized_mat_mul_op", "_MklQuantizedMatMulWithBias") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QINT8)) + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum::v()) + .Attr("_kernel", "QuantizedMklOp") + .Finalize(node_def())); + } else { + TF_ASSERT_OK( + NodeDefBuilder("quantized_mat_mul_op", "_QuantizedMatMul") + .Attr("Thost_inputs", {DT_QUINT8, DT_QINT8, DT_QINT32, DT_FLOAT, + DT_FLOAT, DT_FLOAT, DT_FLOAT}) + .Attr("Thost_outputs", {DT_QINT32, DT_FLOAT, DT_FLOAT}) + .Attr("Tdevice_inputs", std::vector()) + .Attr("Tdevice_outputs", std::vector()) + .Attr("T1", DT_QUINT8) + .Attr("T2", DT_QINT8) + .Attr("Tbias", DT_QINT32) + .Attr("Tout", DT_QINT32) + .Attr("fused_ops", {"BiasAdd"}) + .Input(FakeInput()) + .Input(FakeInput()) + .Finalize(node_def())); + } TF_ASSERT_OK(InitOp()); // A matrix is: // | 1 | 2 | 3 | @@ -91,19 +111,38 @@ TEST_F(QuantizedMatMulTest, Small_withBias) { // Two small matrices A of type uint8 and B of type int8 are multiplied // and the result is added with neg bias as well -TEST_F(QuantizedMatMulTest, Small_withNegBias) { - TF_ASSERT_OK( - NodeDefBuilder("quantized_mat_mul_op", "_MklQuantizedMatMulWithBias") - .Input(FakeInput(DT_QUINT8)) - .Input(FakeInput(DT_QINT8)) - .Input(FakeInput(DT_QINT32)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Attr("Toutput", DataTypeToEnum::v()) - .Attr("_kernel", "QuantizedMklOp") - .Finalize(node_def())); +TEST_P(QuantizedMatMulTest, Small_withNegBias) { + const bool is_old_api = GetParam(); + if (is_old_api) { + TF_ASSERT_OK( + NodeDefBuilder("quantized_mat_mul_op", "_MklQuantizedMatMulWithBias") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QINT8)) + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum::v()) + .Attr("_kernel", "QuantizedMklOp") + .Finalize(node_def())); + } else { + TF_ASSERT_OK( + NodeDefBuilder("quantized_mat_mul_op", "_QuantizedMatMul") + .Attr("Thost_inputs", {DT_QUINT8, DT_QINT8, DT_QINT32, DT_FLOAT, + DT_FLOAT, DT_FLOAT, DT_FLOAT}) + .Attr("Thost_outputs", {DT_QINT32, DT_FLOAT, DT_FLOAT}) + .Attr("Tdevice_inputs", std::vector()) + .Attr("Tdevice_outputs", std::vector()) + .Attr("T1", DT_QUINT8) + .Attr("T2", DT_QINT8) + .Attr("Tbias", DT_QINT32) + .Attr("Tout", DT_QINT32) + .Attr("fused_ops", {"BiasAdd"}) + .Input(FakeInput()) + .Input(FakeInput()) + .Finalize(node_def())); + } TF_ASSERT_OK(InitOp()); // A matrix is: // | 1 | 2 | 3 | @@ -144,20 +183,40 @@ TEST_F(QuantizedMatMulTest, Small_withNegBias) { // Two small matrices A of type uint8 (converted from signed integer) // and B of type int8 are multiplied and the result is added with float bias -TEST_F(QuantizedMatMulTest, Small_WithNegInp) { - TF_ASSERT_OK( - NodeDefBuilder("quantized_mat_mul_op", "_MklQuantizedMatMulWithBias") - .Input(FakeInput(DT_QUINT8)) - .Input(FakeInput(DT_QINT8)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Attr("Toutput", DataTypeToEnum::v()) - .Attr("input_quant_mode", "MIN_FIRST") - .Attr("_kernel", "QuantizedMklOp") - .Finalize(node_def())); +TEST_P(QuantizedMatMulTest, Small_WithNegInp) { + const bool is_old_api = GetParam(); + if (is_old_api) { + TF_ASSERT_OK( + NodeDefBuilder("quantized_mat_mul_op", "_MklQuantizedMatMulWithBias") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum::v()) + .Attr("input_quant_mode", "MIN_FIRST") + .Attr("_kernel", "QuantizedMklOp") + .Finalize(node_def())); + } else { + TF_ASSERT_OK( + NodeDefBuilder("quantized_mat_mul_op", "_QuantizedMatMul") + .Attr("Thost_inputs", {DT_QUINT8, DT_QINT8, DT_FLOAT, DT_FLOAT, + DT_FLOAT, DT_FLOAT, DT_FLOAT}) + .Attr("Thost_outputs", {DT_QINT32, DT_FLOAT, DT_FLOAT}) + .Attr("Tdevice_inputs", std::vector()) + .Attr("Tdevice_outputs", std::vector()) + .Attr("T1", DT_QUINT8) + .Attr("T2", DT_QINT8) + .Attr("Tbias", DT_FLOAT) + .Attr("Tout", DT_QINT32) + .Attr("fused_ops", {"BiasAdd"}) + .Attr("input_quant_mode", "MIN_FIRST") + .Input(FakeInput()) + .Input(FakeInput()) + .Finalize(node_def())); + } TF_ASSERT_OK(InitOp()); // The A matrix is: // | -1 | -5 | -9 | @@ -213,21 +272,41 @@ TEST_F(QuantizedMatMulTest, Small_WithNegInp) { // Two small matrices A of type uint8 and B of type int8 are multiplied // and the result is added with int32 bias and Requantization fusion -TEST_F(QuantizedMatMulTest, Small_withBiasAndReq) { - TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", - "_MklQuantizedMatMulWithBiasAndRequantize") - .Input(FakeInput(DT_QUINT8)) - .Input(FakeInput(DT_QINT8)) - .Input(FakeInput(DT_QINT32)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Attr("Toutput", DataTypeToEnum::v()) - .Attr("_kernel", "QuantizedMklOp") - .Finalize(node_def())); +TEST_P(QuantizedMatMulTest, Small_withBiasAndReq) { + const bool is_old_api = GetParam(); + if (is_old_api) { + TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", + "_MklQuantizedMatMulWithBiasAndRequantize") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QINT8)) + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum::v()) + .Attr("_kernel", "QuantizedMklOp") + .Finalize(node_def())); + } else { + TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "_QuantizedMatMul") + .Attr("Thost_inputs", + {DT_QUINT8, DT_QINT8, DT_QINT32, DT_FLOAT, DT_FLOAT, + DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT}) + .Attr("Thost_outputs", {DT_QUINT8, DT_FLOAT, DT_FLOAT}) + .Attr("Tdevice_inputs", std::vector()) + .Attr("Tdevice_outputs", std::vector()) + .Attr("T1", DT_QUINT8) + .Attr("T2", DT_QINT8) + .Attr("Tbias", DT_QINT32) + .Attr("Tout", DT_QUINT8) + .Attr("fused_ops", {"BiasAdd", "Requantize"}) + .Input(FakeInput()) + .Input(FakeInput()) + .Finalize(node_def())); + } + TF_ASSERT_OK(InitOp()); // A matrix is: // | 1 | 2 | 3 | @@ -273,11 +352,25 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndReq) { // 178 * 1.00392 ~= 178.698 ~= 179 Tensor expected(allocator(), DT_QUINT8, TensorShape({2, 4})); + if (is_old_api) { #ifdef ENABLE_ONEDNN_V3 - test::FillValues(&expected, {84, 60, 116, 52, 183, 168, 233, 178}); + test::FillValues(&expected, {84, 60, 116, 52, 183, 168, 233, 178}); #else - test::FillValues(&expected, {84, 60, 116, 52, 184, 169, 234, 179}); + test::FillValues(&expected, {84, 60, 116, 52, 184, 169, 234, 179}); #endif // ENABLE_ONEDNN_V3 + } else { + // New api uses more numerical precision preserving equation. Old api scales + // up to 32-bit and then scales down from 32-bit to 8-bit. New api instead + // does a dequantization followed by a scaling to 8-bit. + // In this test, + // input deq. scale = ((255.0 * 127.0) / (255.0 * 127.0)) = 1.0 + // output req. scale = 255.0 / 255.0 = 1.0 + // combined scale = 1.0 * 1.0 = 1.0 + // Note: new api scale value is 1.0, whereas the old api scale is 1.000392. + // Correct value is 1.0f. The closer it is to the correct value the better + // the formula is. + test::FillValues(&expected, {84, 60, 116, 52, 183, 168, 233, 178}); + } const Tensor& output = *GetOutput(0); test::ExpectTensorEqual(expected, output); @@ -285,21 +378,40 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndReq) { // Two small matrices A of type uint8 and B of type int8 are multiplied // and the result is added with int32 bias and Requantization fusion -TEST_F(QuantizedMatMulTest, Small_withBiasAndDeq) { - TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", - "_MklQuantizedMatMulWithBiasAndDequantize") - .Input(FakeInput(DT_QUINT8)) - .Input(FakeInput(DT_QINT8)) - .Input(FakeInput(DT_QINT32)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Attr("Toutput", DataTypeToEnum::v()) - .Attr("_kernel", "QuantizedMklOp") - .Finalize(node_def())); +TEST_P(QuantizedMatMulTest, Small_withBiasAndDeq) { + const bool is_old_api = GetParam(); + if (is_old_api) { + TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", + "_MklQuantizedMatMulWithBiasAndDequantize") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QINT8)) + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum::v()) + .Attr("_kernel", "QuantizedMklOp") + .Finalize(node_def())); + } else { + TF_ASSERT_OK( + NodeDefBuilder("quantized_mat_mul_op", "_QuantizedMatMul") + .Attr("Thost_inputs", {DT_QUINT8, DT_QINT8, DT_QINT32, DT_FLOAT, + DT_FLOAT, DT_FLOAT, DT_FLOAT}) + .Attr("Thost_outputs", {DT_FLOAT}) + .Attr("Tdevice_inputs", std::vector()) + .Attr("Tdevice_outputs", std::vector()) + .Attr("T1", DT_QUINT8) + .Attr("T2", DT_QINT8) + .Attr("Tbias", DT_QINT32) + .Attr("Tout", DT_FLOAT) + .Attr("fused_ops", {"BiasAdd", "Dequantize"}) + .Input(FakeInput()) + .Input(FakeInput()) + .Finalize(node_def())); + } TF_ASSERT_OK(InitOp()); // A matrix is: // | 1 | 2 | 3 | @@ -316,9 +428,11 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndDeq) { AddInputFromArray(TensorShape({}), {255.0f}); AddInputFromArray(TensorShape({}), {-127.0f}); AddInputFromArray(TensorShape({}), {127.0f}); - AddInputFromArray(TensorShape({}), {0}); - AddInputFromArray(TensorShape({}), {255.0f}); + if (is_old_api) { + AddInputFromArray(TensorShape({}), {0}); + AddInputFromArray(TensorShape({}), {255.0f}); + } TF_ASSERT_OK(RunOpKernel()); // Here are the results we expect, from hand calculations: // (1 * 7) + (2 * 11) + (3 * 15) = 74 @@ -353,19 +467,38 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndDeq) { // Two small matrices A of type uint8 and B of type int8 are multiplied // and the result is added with float bias and then performed relu on the result -TEST_F(QuantizedMatMulTest, Small_withBiasAndRelu) { - TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", - "_MklQuantizedMatMulWithBiasAndRelu") - .Input(FakeInput(DT_QUINT8)) - .Input(FakeInput(DT_QINT8)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Attr("Toutput", DataTypeToEnum::v()) - .Attr("_kernel", "QuantizedMklOp") - .Finalize(node_def())); +TEST_P(QuantizedMatMulTest, Small_withBiasAndRelu) { + const bool is_old_api = GetParam(); + if (is_old_api) { + TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", + "_MklQuantizedMatMulWithBiasAndRelu") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QINT8)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum::v()) + .Attr("_kernel", "QuantizedMklOp") + .Finalize(node_def())); + } else { + TF_ASSERT_OK( + NodeDefBuilder("quantized_mat_mul_op", "_QuantizedMatMul") + .Attr("Thost_inputs", {DT_QUINT8, DT_QINT8, DT_FLOAT, DT_FLOAT, + DT_FLOAT, DT_FLOAT, DT_FLOAT}) + .Attr("Thost_outputs", {DT_QINT32, DT_FLOAT, DT_FLOAT}) + .Attr("Tdevice_inputs", std::vector()) + .Attr("Tdevice_outputs", std::vector()) + .Attr("T1", DT_QUINT8) + .Attr("T2", DT_QINT8) + .Attr("Tbias", DT_FLOAT) + .Attr("Tout", DT_QINT32) + .Attr("fused_ops", {"BiasAdd", "Relu"}) + .Input(FakeInput()) + .Input(FakeInput()) + .Finalize(node_def())); + } TF_ASSERT_OK(InitOp()); // A matrix is: // | 1 | 2 | 3 | @@ -408,21 +541,41 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndRelu) { // Simple test for Matrix multiplication with Bias, Relu and // Requantization fusion -TEST_F(QuantizedMatMulTest, Small_withBiasAndReluAndReq) { - TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", - "_MklQuantizedMatMulWithBiasAndReluAndRequantize") - .Input(FakeInput(DT_QUINT8)) - .Input(FakeInput(DT_QINT8)) - .Input(FakeInput(DT_QINT32)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Attr("Toutput", DataTypeToEnum::v()) - .Attr("_kernel", "QuantizedMklOp") - .Finalize(node_def())); +TEST_P(QuantizedMatMulTest, Small_withBiasAndReluAndReq) { + const bool is_old_api = GetParam(); + if (is_old_api) { + TF_ASSERT_OK( + NodeDefBuilder("quantized_mat_mul_op", + "_MklQuantizedMatMulWithBiasAndReluAndRequantize") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QINT8)) + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum::v()) + .Attr("_kernel", "QuantizedMklOp") + .Finalize(node_def())); + } else { + TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "_QuantizedMatMul") + .Attr("Thost_inputs", + {DT_QUINT8, DT_QINT8, DT_QINT32, DT_FLOAT, DT_FLOAT, + DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT}) + .Attr("Thost_outputs", {DT_QUINT8, DT_FLOAT, DT_FLOAT}) + .Attr("Tdevice_inputs", std::vector()) + .Attr("Tdevice_outputs", std::vector()) + .Attr("T1", DT_QUINT8) + .Attr("T2", DT_QINT8) + .Attr("Tbias", DT_QINT32) + .Attr("Tout", DT_QUINT8) + .Attr("fused_ops", {"BiasAdd", "Relu", "Requantize"}) + .Input(FakeInput()) + .Input(FakeInput()) + .Finalize(node_def())); + } TF_ASSERT_OK(InitOp()); // A matrix is: // | 1 | 2 | 3 | @@ -470,11 +623,25 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndReluAndReq) { // 178 * 1.00392 ~= 178.698 ~= 179 Tensor expected(allocator(), DT_QUINT8, TensorShape({2, 4})); + if (is_old_api) { #ifdef ENABLE_ONEDNN_V3 - test::FillValues(&expected, {84, 60, 116, 52, 183, 168, 233, 178}); + test::FillValues(&expected, {84, 60, 116, 52, 183, 168, 233, 178}); #else - test::FillValues(&expected, {84, 60, 116, 52, 184, 169, 234, 179}); + test::FillValues(&expected, {84, 60, 116, 52, 184, 169, 234, 179}); #endif // ENABLE_ONEDNN_V3 + } else { + // New api uses more numerical precision preserving equation. Old api scales + // up to 32-bit and then scales down from 32-bit to 8-bit. New api instead + // does a dequantization followed by a scaling to 8-bit. + // In this test, + // input deq. scale = ((255.0 * 127.0) / (255.0 * 127.0)) = 1.0 + // output req. scale = 255.0 / 255.0 = 1.0 + // combined scale = 1.0 * 1.0 = 1.0 + // Note: new api scale value is 1.0, whereas the old api scale is 1.000392. + // Correct value is 1.0f. The closer it is to the correct value the better + // the formula is. + test::FillValues(&expected, {84, 60, 116, 52, 183, 168, 233, 178}); + } const Tensor& output = *GetOutput(0); test::ExpectTensorEqual(expected, output); @@ -484,19 +651,38 @@ TEST_F(QuantizedMatMulTest, Small_withBiasAndReluAndReq) { // and the result is added with int32 bias // For the first time B matrix will be reordered and cached which will be // used for subsequent runs -TEST_F(QuantizedMatMulTest, Small_withWeightCached) { - TF_ASSERT_OK( - NodeDefBuilder("quantized_mat_mul_op", "_MklQuantizedMatMulWithBias") - .Input(FakeInput(DT_QUINT8)) - .Input(FakeInput(DT_QINT8)) - .Input(FakeInput(DT_QINT32)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Input(FakeInput(DT_FLOAT)) - .Attr("Toutput", DataTypeToEnum::v()) - .Attr("_kernel", "QuantizedMklOp") - .Finalize(node_def())); +TEST_P(QuantizedMatMulTest, Small_withWeightCached) { + const bool is_old_api = GetParam(); + if (is_old_api) { + TF_ASSERT_OK( + NodeDefBuilder("quantized_mat_mul_op", "_MklQuantizedMatMulWithBias") + .Input(FakeInput(DT_QUINT8)) + .Input(FakeInput(DT_QINT8)) + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Toutput", DataTypeToEnum::v()) + .Attr("_kernel", "QuantizedMklOp") + .Finalize(node_def())); + } else { + TF_ASSERT_OK( + NodeDefBuilder("quantized_mat_mul_op", "_QuantizedMatMul") + .Attr("Thost_inputs", {DT_QUINT8, DT_QINT8, DT_QINT32, DT_FLOAT, + DT_FLOAT, DT_FLOAT, DT_FLOAT}) + .Attr("Thost_outputs", {DT_QINT32, DT_FLOAT, DT_FLOAT}) + .Attr("Tdevice_inputs", std::vector()) + .Attr("Tdevice_outputs", std::vector()) + .Attr("T1", DT_QUINT8) + .Attr("T2", DT_QINT8) + .Attr("Tbias", DT_QINT32) + .Attr("Tout", DT_QINT32) + .Attr("fused_ops", {"BiasAdd"}) + .Input(FakeInput()) + .Input(FakeInput()) + .Finalize(node_def())); + } TF_ASSERT_OK(InitOp()); // The tensor shape of (1,3) is selected to allow the oneDNN expected // weight format to be made as OI rather than IO for BS > 1 @@ -549,6 +735,9 @@ TEST_F(QuantizedMatMulTest, Small_withWeightCached) { test::ExpectTensorEqual(expected, output_new); } +INSTANTIATE_TEST_SUITE_P(All, QuantizedMatMulTest, + ::testing::Values(true, false)); + } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/kernels/mkl/onednn_fused_matmul_ops_test.cc b/tensorflow/core/kernels/mkl/onednn_fused_matmul_ops_test.cc new file mode 100644 index 00000000000000..af1d056ca7530a --- /dev/null +++ b/tensorflow/core/kernels/mkl/onednn_fused_matmul_ops_test.cc @@ -0,0 +1,749 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#if defined(INTEL_MKL) + +#define EIGEN_USE_THREADS + +#include +#include + +#include "absl/algorithm/container.h" +#include "absl/strings/match.h" +#include "gtest/gtest.h" +#include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/math_ops.h" +#include "tensorflow/cc/ops/nn_ops.h" +#include "tensorflow/cc/ops/nn_ops_internal.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/kernels/mkl/mkl_kernel_util.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/rewriter_config.pb.h" +#include "tensorflow/core/public/session.h" +#include "tensorflow/core/util/util.h" +#include "unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +// The test suite contains different categories of tests. +// (1) Realnumber (float/bfloat16): The output of _FusedMatMul should be +// close enough to the final output of the sequence of unfused operations. +// Only Gelu fusion is included here. All other fusion tests can be found in +// tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc +// +// (2) Quantized: Possible fusions are done in _QuantizedMatMul op. The +// output of +// quantize --> quantized_op --> dequantize, or +// quantize --> quantized_op --> requantize --> dequantize +// should be close (with a higher tolerance) to the final output of the +// sequence of unfused real number type operations. For the quantized +// scenario, it is assumed that the first matrix of MatMul op represents +// feature, while the second matrix represents weight parameters. The feature +// matrix can be quantized with MIN_FIRST (to QUINT8) or SCALED (to QINT8) +// mode and always quantized per-tensor. The weight can be quantized with +// per-tensor or per-channel, only with SCALED mode to QINT8. + +// T: float or bfloat16 used as tensor type of the MatMul and fusion operation. +template +class FusedMatMulOpsTest : public OpsTestBase { + private: + float leakyrelu_alpha_ = 0.2f; + + protected: + struct FusedOpsAndDims { + // List of fusions. + std::vector fused_ops; + // Tensor dimension associated with the fusions. It is assumed here that + // each fusion requires no more than one addtional tensor. If some fusion + // does not require a tensor, e.g., Relu, the tensor dimensions will be {0} + // implying an an empty tensor. + std::vector> fusion_dims; + }; + + struct FusedOpsAndTensors { + // List of fusions. + std::vector fused_ops; + // Tensors associated with the fusions. It is assumed here that each fusion + // requires no more than one additional tensor. If some fusion does not + // require a tensor, e.g., Relu, the tensor will be an empty tensor. + std::vector fusion_tensors; + }; + + using GraphRunner = + std::function; + + using QuantizedGraphRunner = std::function; + + bool HasQuantizationSupport() { + return TestCPUFeature(tensorflow::port::CPUFeature::AVX_VNNI_INT8) || + TestCPUFeature(tensorflow::port::CPUFeature::AVX512_VNNI) || + TestCPUFeature(port::CPUFeature::AMX_INT8); + } + + // Runs a Tensorflow graph defined by the root scope, and fetches the result + // of 'fetch' node into the outputs. Optional `add_nodes` parameter + // allows to define nodes directly using NodeDefBuilder. + void RunAndFetch(const tensorflow::Scope& root, + const std::vector& fetch, + std::vector* outputs, + const std::vector add_nodes = {}) { + tensorflow::GraphDef graph; + TF_ASSERT_OK(root.ToGraphDef(&graph)); + + for (const NodeDef* add_node : add_nodes) { + *graph.add_node() = *add_node; + } + + // We really want to make sure that graph executed exactly as we passed it + // to the session, so we disable various optimizations. + tensorflow::SessionOptions session_options; + + // Disable common runtime constant folding. + session_options.config.mutable_graph_options() + ->mutable_optimizer_options() + ->set_opt_level(OptimizerOptions::L0); + + // Disable Grappler optimizations for tests. + tensorflow::RewriterConfig* cfg = + session_options.config.mutable_graph_options() + ->mutable_rewrite_options(); + cfg->set_constant_folding(tensorflow::RewriterConfig::OFF); + cfg->set_layout_optimizer(tensorflow::RewriterConfig::OFF); + cfg->set_remapping(tensorflow::RewriterConfig::OFF); + + std::unique_ptr session( + tensorflow::NewSession(session_options)); + + const string device = "/device:CPU:0"; + for (NodeDef& mutable_node : *graph.mutable_node()) { + mutable_node.set_device(device); + } + + TF_ASSERT_OK(session->Create(graph)); + TF_ASSERT_OK(session->Run({}, fetch, {}, outputs)); + } + + Output ActivationOp(Scope& root, string op, Output x, string name) { + // TODO(intel-tf): Add GeluExact (Erf op based) when the Erf op is enabled + // for bfloat16. GeluExact with float32 precision test can be found in + // tensorflow/python/grappler/remapper_test.py + if (op == "Relu") { + return ops::Relu(root.WithOpName(name), x); + } else if (op == "Relu6") { + return ops::Relu6(root.WithOpName(name), x); + } else if (op == "LeakyRelu") { + return ops::internal::LeakyRelu( + root.WithOpName(name), x, + ops::internal::LeakyRelu::Attrs().Alpha(this->leakyrelu_alpha_)); + } else if (op == "Elu") { + return ops::Elu(root.WithOpName(name), x); + } else if (op == "Tanh") { + return ops::Tanh(root.WithOpName(name), x); + } else if (op == "Sigmoid") { + return ops::Sigmoid(root.WithOpName(name), x); + } else if (op == "GeluApproximate") { + Output three = ops::Const(root.WithOpName("gelu_three"), 3.0f); + Output empirical = + ops::Const(root.WithOpName("gelu_empirical"), 0.044715f); + Output square_root_two_over_pi = ops::Const( + root.WithOpName("gelu_square_root_two_over_pi"), 0.7978845608028654f); + Output one = ops::Const(root.WithOpName("gelu_one"), 1.0f); + Output half = ops::Const(root.WithOpName("gelu_half"), 0.5f); + Output pow = ops::Pow(root.WithOpName("gelu_pow"), x, three); + Output mul1 = ops::Multiply(root.WithOpName("gelu_mul1"), empirical, pow); + Output add1 = ops::AddV2(root.WithOpName("gelu_add1"), x, mul1); + Output mul2 = ops::Multiply(root.WithOpName("gelu_mul2"), + square_root_two_over_pi, add1); + Output tanh = ops::Tanh(root.WithOpName("gelu_tanh"), mul2); + Output add3 = ops::AddV2(root.WithOpName("gelu_add3"), one, tanh); + Output mul3 = ops::Multiply(root.WithOpName("gelu_mul3"), half, x); + return ops::Multiply(root.WithOpName(name), mul3, add3); + } else { + EXPECT_TRUE(false) << absl::StrCat("The activation: ", op, + " is not supported in this test."); + } + } + + void RunMatMulAndFusedOps(const Tensor& x, const Tensor& y, + const FusedOpsAndTensors& fused_ops_and_tensors, + Tensor* result, bool transpose_x, + bool transpose_y) { + Scope root = tensorflow::Scope::NewRootScope(); + + Output x_input = + ops::Const(root.WithOpName("x_input"), Input::Initializer(x)); + Output y_input = + ops::Const(root.WithOpName("y_input"), Input::Initializer(y)); + Output last_output = ops::MatMul( + root.WithOpName("matmul"), x_input, y_input, + ops::MatMul::Attrs().TransposeA(transpose_x).TransposeB(transpose_y)); + auto& fused_ops = fused_ops_and_tensors.fused_ops; + auto& fusion_tensors = fused_ops_and_tensors.fusion_tensors; + for (int i = 0; i < fused_ops.size(); ++i) { + const string& op = fused_ops[i]; + if (op == "BiasAdd") { + Output arg = ops::Const(root.WithOpName(absl::StrCat("arg", i)), + Input::Initializer(fusion_tensors[i])); + last_output = ops::BiasAdd( + root.WithOpName(absl::StrCat("bias_add_at_", i)), last_output, arg); + } else if (op == "Relu" || op == "Relu6" || op == "LeakyRelu" || + op == "Elu" || op == "Tanh" || op == "Sigmoid" || + op == "GeluApproximate") { + last_output = + ActivationOp(root, op, last_output, absl::StrCat(op, "_at_", i)); + } else if (op == "Add") { + ASSERT_EQ(x.dtype(), fusion_tensors[i].dtype()); + Output arg = ops::Const(root.WithOpName(absl::StrCat("arg", i)), + Input::Initializer(fusion_tensors[i])); + last_output = ops::AddV2(root.WithOpName(absl::StrCat("add_at_", i)), + last_output, arg); + } else { + EXPECT_TRUE(false) << absl::StrCat("The fusion: [", + absl::StrJoin(fused_ops, ","), + "] is not supported in this test."); + } + } + std::vector outputs; + RunAndFetch(root, {last_output.name()}, &outputs); + *result = outputs[0]; + } + + void RunFusedMatMul(const Tensor& x, const Tensor& y, + const FusedOpsAndTensors& fused_ops_and_tensors, + Tensor* result, bool transpose_x, bool transpose_y) { + Scope root = tensorflow::Scope::NewRootScope(); + + DataType dtype = DataTypeToEnum::v(); + + Output x_input = + ops::Const(root.WithOpName("x_input"), Input::Initializer(x)); + Output y_input = + ops::Const(root.WithOpName("y_input"), Input::Initializer(y)); + auto& fused_ops = fused_ops_and_tensors.fused_ops; + auto& fusion_tensors = fused_ops_and_tensors.fusion_tensors; + int num_fusion_inputs = 0; + bool has_leaky_relu = false; + std::vector fusion_inputs; + for (int i = 0; i < fused_ops.size(); ++i) { + const string& op = fused_ops[i]; + if (op == "BiasAdd") { + Output arg = ops::Const(root.WithOpName(absl::StrCat("arg", i)), + Input::Initializer(fusion_tensors[i])); + fusion_inputs.push_back({arg.name(), 0, dtype}); + num_fusion_inputs++; + } else if (op == "Add") { + ASSERT_EQ(x.dtype(), fusion_tensors[i].dtype()); + Output arg = ops::Const(root.WithOpName(absl::StrCat("arg", i)), + Input::Initializer(fusion_tensors[i])); + fusion_inputs.push_back({arg.name(), 0, dtype}); + num_fusion_inputs++; + } else if (op == "LeakyRelu") { + has_leaky_relu = true; + } else { + bool is_supported = op == "Relu" || op == "Relu6" || + op == "LeakyRelu" || op == "Elu" || op == "Tanh" || + op == "Sigmoid" || op == "GeluApproximate"; + EXPECT_TRUE(is_supported) + << absl::StrCat("The fusion: [", absl::StrJoin(fused_ops, ","), + "] is not supported in this test."); + } + } + NodeDef fused_matmul; + std::vector add_nodes; + TF_EXPECT_OK(NodeDefBuilder("fused_batch_matmul", "_MklNativeFusedMatMul") + .Input({x_input.name(), 0, dtype}) + .Input({y_input.name(), 0, dtype}) + .Input(fusion_inputs) + .Attr("transpose_a", transpose_x) + .Attr("transpose_b", transpose_y) + .Attr("num_args", num_fusion_inputs) + .Attr("fused_ops", fused_ops) + .Attr("leakyrelu_alpha", + has_leaky_relu ? this->leakyrelu_alpha_ : 0.2f) + .Attr("_kernel", "MklNameChangeOp") + .Finalize(&fused_matmul)); + add_nodes = {&fused_matmul}; + std::vector outputs; + RunAndFetch(root, {fused_matmul.name()}, &outputs, add_nodes); + *result = outputs[0]; + } + + // Compute quantized tensor perchannel (aka axis) in SCALED mode for 2D + // tensor. + template + void GetPerchannelQuantizationTensors(const Tensor& input, Tensor* output, + Tensor* min_tensor, + Tensor* max_tensor) { + ASSERT_EQ(input.dims(), 2); + ASSERT_EQ(output->dtype(), DT_QINT8); + constexpr int axis = transpose ? 0 : 1; + int num_channels = input.dim_size(axis); + ASSERT_EQ(min_tensor->NumElements(), num_channels); + ASSERT_EQ(max_tensor->NumElements(), num_channels); + + auto eigen_input_tensor = input.matrix().template cast(); + auto eigen_output_tensor = output->matrix(); + std::vector scales(num_channels); + float* min_tensor_buf = min_tensor->flat().data(); + float* max_tensor_buf = max_tensor->flat().data(); + for (int i = 0; i < num_channels; ++i) { + auto input_slice = eigen_input_tensor.template chip(i); + auto output_slice = eigen_output_tensor.template chip(i); + Eigen::Tensor min = input_slice.minimum(); + Eigen::Tensor max = input_slice.maximum(); + float min_i = min(); + float max_i = max(); + float range = std::max(std::abs(min_i), std::abs(max_i)); + min_tensor_buf[i] = -range; + max_tensor_buf[i] = range; + const float scale = 127.0f / range; + output_slice = (input_slice * scale).round().template cast(); + } + } + + void RunQuantizedMatMul(const Tensor& x, const Tensor& y, + const FusedOpsAndTensors& fused_ops_and_tensors, + Tensor* result, bool transpose_x, bool transpose_y, + string input_quant_mode, string output_quant_mode, + bool is_bias_quantized, bool is_perchannel, + bool requantize, float output_min, float output_max) { + // TODO(intel-tf): Extend test with quantized bias + ASSERT_EQ(is_bias_quantized, false); + + DataType real_dtype = DataTypeToEnum::v(); + DataType qinput_dtype = + (input_quant_mode == "MIN_FIRST") ? DT_QUINT8 : DT_QINT8; + // Quantize x and y + Tensor x_qtensor(qinput_dtype, x.shape()); + Tensor x_min_tensor(DT_FLOAT, TensorShape({})); + Tensor x_max_tensor(DT_FLOAT, TensorShape({})); + auto status = MklTestingUtil::GetQuantizationTensors( + x, &x_qtensor, qinput_dtype, input_quant_mode, &x_min_tensor, + &x_max_tensor); + ASSERT_TRUE(status.ok()); + Tensor y_qtensor(DT_QINT8, y.shape()); + const int num_channels = transpose_y ? y.dim_size(0) : y.dim_size(1); + TensorShape minmax_shape = + is_perchannel ? TensorShape({num_channels}) : TensorShape({}); + Tensor y_min_tensor(DT_FLOAT, minmax_shape); + Tensor y_max_tensor(DT_FLOAT, minmax_shape); + if (is_perchannel) { + if (transpose_y) { + GetPerchannelQuantizationTensors(y, &y_qtensor, &y_min_tensor, + &y_max_tensor); + } else { + GetPerchannelQuantizationTensors(y, &y_qtensor, &y_min_tensor, + &y_max_tensor); + } + } else { + auto status = MklTestingUtil::GetQuantizationTensors( + y, &y_qtensor, DT_QINT8, "SCALED", &y_min_tensor, &y_max_tensor); + ASSERT_TRUE(status.ok()); + } + + Scope root = tensorflow::Scope::NewRootScope(); + + Output x_input = + ops::Const(root.WithOpName("x_input"), Input::Initializer(x_qtensor)); + Output x_min = + ops::Const(root.WithOpName("x_min"), Input::Initializer(x_min_tensor)); + Output x_max = + ops::Const(root.WithOpName("x_max"), Input::Initializer(x_max_tensor)); + Output y_input = + ops::Const(root.WithOpName("y_input"), Input::Initializer(y_qtensor)); + Output y_min = + ops::Const(root.WithOpName("y_min"), Input::Initializer(y_min_tensor)); + Output y_max = + ops::Const(root.WithOpName("y_max"), Input::Initializer(y_max_tensor)); + auto& fused_ops = fused_ops_and_tensors.fused_ops; + auto& fusion_tensors = fused_ops_and_tensors.fusion_tensors; + int num_fusion_inputs = 0; + std::vector fusion_inputs; + bool has_leaky_relu = false; + for (int i = 0; i < fused_ops.size(); ++i) { + const string& op = fused_ops[i]; + if (op == "BiasAdd") { + Output arg = ops::Const(root.WithOpName(absl::StrCat("arg", i)), + Input::Initializer(fusion_tensors[i])); + fusion_inputs.push_back({arg.name(), 0, real_dtype}); + num_fusion_inputs++; + } else if (op == "Add") { + ASSERT_EQ(real_dtype, fusion_tensors[i].dtype()); + Output arg = ops::Const(root.WithOpName(absl::StrCat("arg", i)), + Input::Initializer(fusion_tensors[i])); + fusion_inputs.push_back({arg.name(), 0, real_dtype}); + num_fusion_inputs++; + } else if (op == "LeakyRelu") { + has_leaky_relu = true; + } + } + NodeDef fused_matmul; + std::vector add_nodes; + std::vector outputs; + std::vector inputs; + inputs.push_back({"x_input", 0, qinput_dtype}); + inputs.push_back({"y_input", 0, DT_QINT8}); + inputs.insert(std::end(inputs), std::begin(fusion_inputs), + std::end(fusion_inputs)); + inputs.push_back({"x_min", 0, DT_FLOAT}); + inputs.push_back({"x_max", 0, DT_FLOAT}); + inputs.push_back({"y_min", 0, DT_FLOAT}); + inputs.push_back({"y_max", 0, DT_FLOAT}); + std::vector extended_fused_ops(fused_ops); + DataType out_dtype; + if (requantize) { + if (output_quant_mode == "SCALED") { + out_dtype = DT_QINT8; + } else { + out_dtype = DT_QUINT8; + } + } else { + out_dtype = real_dtype; + } + std::vector output_dtypes; + if (requantize) { + Output out_min = ops::Const(root.WithOpName("output_min"), output_min); + Output out_max = ops::Const(root.WithOpName("output_max"), output_max); + inputs.push_back({"output_min", 0, DT_FLOAT}); + inputs.push_back({"output_max", 0, DT_FLOAT}); + extended_fused_ops.push_back("Requantize"); + output_dtypes = {out_dtype, DT_FLOAT, DT_FLOAT}; + } else { + extended_fused_ops.push_back("Dequantize"); + output_dtypes = {out_dtype}; + } + + TF_EXPECT_OK(NodeDefBuilder("quantized_fused_matmul", "_QuantizedMatMul") + .Attr("Tdevice_inputs", std::vector()) + .Input(FakeInput()) + .Input(inputs) + .Attr("Thost_outputs", output_dtypes) + .Attr("Tdevice_outputs", std::vector()) + .Attr("T1", qinput_dtype) + .Attr("T2", DT_QINT8) + .Attr("Tbias", real_dtype) + .Attr("Tout", out_dtype) + .Attr("U", real_dtype) + .Attr("transpose_a", transpose_x) + .Attr("transpose_b", transpose_y) + .Attr("fused_ops", extended_fused_ops) + .Attr("leakyrelu_alpha", + has_leaky_relu ? this->leakyrelu_alpha_ : 0.2f) + .Attr("input_quant_mode", input_quant_mode) + .Attr("output_quant_mode", output_quant_mode) + .Finalize(&fused_matmul)); + if (requantize) { + NodeDef dequantize; + TF_EXPECT_OK(NodeDefBuilder("dequantize", "Dequantize") + .Input({"quantized_fused_matmul", 0, out_dtype}) + .Input({"quantized_fused_matmul", 1, DT_FLOAT}) + .Input({"quantized_fused_matmul", 2, DT_FLOAT}) + .Attr("dtype", real_dtype) + .Attr("mode", output_quant_mode) + .Finalize(&dequantize)); + add_nodes = {&fused_matmul, &dequantize}; + RunAndFetch(root, {dequantize.name()}, &outputs, add_nodes); + } else { + add_nodes = {&fused_matmul}; + RunAndFetch(root, {fused_matmul.name()}, &outputs, add_nodes); + } + *result = outputs[0]; + } + + template + void VerifyTensorsNear(const std::vector& x_dims, + const std::vector& y_dims, + const FusedOpsAndDims& fused_ops_and_dims, + const GraphRunner& run_default, + const FusedGraphRunner& run_fused, bool transpose_x, + bool transpose_y, const double atol = 1e-5, + // The following arguments are used by quantized fusion + string input_quant_mode = "SCALED", + string output_quant_mode = "SCALED", + bool is_bias_quantized = false, + bool is_perchannel = false, bool requantize = false) { + srand(1234); + DataType dtype = DataTypeToEnum::v(); + TensorShape x_shape = TensorShape(x_dims); + TensorShape y_shape = TensorShape(y_dims); + + Tensor x_tensor(dtype, x_shape); + x_tensor.flat().setRandom(); + x_tensor.flat() -= x_tensor.flat().constant(static_cast(0.5)); + + Tensor y_tensor(dtype, y_shape); + y_tensor.flat().setRandom(); + y_tensor.flat() -= y_tensor.flat().constant(static_cast(0.5)); + + FusedOpsAndTensors fused_ops_and_tensors; + fused_ops_and_tensors.fused_ops = fused_ops_and_dims.fused_ops; + const auto& fused_ops = fused_ops_and_tensors.fused_ops; // Alias to field + const auto& fusion_dims = fused_ops_and_dims.fusion_dims; // Alias to field + auto& fusion_tensors = fused_ops_and_tensors.fusion_tensors; + for (int i = 0; i < fused_ops.size(); ++i) { + TensorShape arg_shape = TensorShape(fusion_dims[i]); + Tensor arg_tensor(dtype, arg_shape); + arg_tensor.flat().setRandom(); + arg_tensor.flat() -= + arg_tensor.flat().constant(static_cast(0.5)); + fusion_tensors.push_back(arg_tensor); + } + Tensor default_result; + run_default(x_tensor, y_tensor, fused_ops_and_tensors, &default_result, + transpose_x, transpose_y); + + Tensor fused_result; + if constexpr (std::is_same::value) { + float output_min = 1.0; + float output_max = 1.0 + std::numeric_limits::epsilon(); + if (requantize) { + T min; + T max; + MklTestingUtil::ComputeMinMax(default_result, &min, &max); + output_min = static_cast(min); + output_max = static_cast(max); + } + // Run quantized fusion + run_fused(x_tensor, y_tensor, fused_ops_and_tensors, &fused_result, + transpose_x, transpose_y, input_quant_mode, output_quant_mode, + is_bias_quantized, is_perchannel, requantize, output_min, + output_max); + } else { + // Run realnumber type fusion + run_fused(x_tensor, y_tensor, fused_ops_and_tensors, &fused_result, + transpose_x, transpose_y); + } + std::vector> tensor_pairs = { + {default_result, fused_result}}; + for (auto& pair : tensor_pairs) { + const Tensor& expected = pair.first; + const Tensor& evaluated = pair.second; + + ASSERT_EQ(expected.dtype(), evaluated.dtype()); + ASSERT_EQ(expected.shape(), evaluated.shape()); + + test::ExpectClose(expected, evaluated, atol); + } + } + + void GetFusionConfiguration(const std::vector& fused_ops, + const int row, const int col, + FusedOpsAndDims* fused_ops_and_dims) { + if (fused_ops == std::vector{"BiasAdd"}) { + *fused_ops_and_dims = {fused_ops, {std::vector{col}}}; + } else if (fused_ops == std::vector{"BiasAdd", "Relu"} || + fused_ops == std::vector{"BiasAdd", "Relu6"} || + fused_ops == std::vector{"BiasAdd", "LeakyRelu"} || + fused_ops == std::vector{"BiasAdd", "Elu"} || + fused_ops == std::vector{"BiasAdd", "Tanh"} || + fused_ops == std::vector{"BiasAdd", "Sigmoid"} || + fused_ops == std::vector{"BiasAdd", "GeluApproximate"}) { + *fused_ops_and_dims = { + fused_ops, {std::vector{col}, std::vector{0}}}; + } else if (fused_ops == std::vector{"BiasAdd", "Add"}) { + *fused_ops_and_dims = { + fused_ops, + {std::vector{col}, std::vector{row, col}}}; + } else { + EXPECT_TRUE(false) << absl::StrCat("The fusion: [", + absl::StrJoin(fused_ops, ","), + "] is not supported in this test."); + } + } + + void VerifyFusedMatMul(std::vector fused_ops) { + const GraphRunner run_default = + [&](const Tensor& x, const Tensor& y, + const FusedOpsAndTensors& fused_ops_and_tensors, Tensor* result, + bool transpose_x, bool transpose_y) { + this->RunMatMulAndFusedOps(x, y, fused_ops_and_tensors, result, + transpose_x, transpose_y); + }; + + const GraphRunner run_fused = + [&](const Tensor& x, const Tensor& y, + const FusedOpsAndTensors& fused_ops_and_tensors, Tensor* result, + bool transpose_x, bool transpose_y) { + this->RunFusedMatMul(x, y, fused_ops_and_tensors, result, transpose_x, + transpose_y); + }; + const double atol = std::is_same::value ? 1e-2 : 1e-5; + constexpr int M = 3; + constexpr int K = 4; + constexpr int N = 5; + bool transpose_x = false; // OpKernel does not support transpose_x. + std::vector x_dims; + std::vector y_dims; + FusedOpsAndDims fused_ops_and_dims; + GetFusionConfiguration(fused_ops, M, N, &fused_ops_and_dims); + for (bool transpose_y : {false, true}) { + x_dims = + transpose_x ? std::vector{K, M} : std::vector{M, K}; + y_dims = + transpose_y ? std::vector{N, K} : std::vector{K, N}; + VerifyTensorsNear(x_dims, y_dims, fused_ops_and_dims, + run_default, run_fused, transpose_x, + transpose_y, atol); + } + } + + // The following test runs with 32 configurations. + // (1) input quantization mode : {"MIN_FIRST", "SCALED"} + // (2) input quantization mode : {"MIN_FIRST", "SCALED"} + // (3) weight quantization per_channel : {false, true} + // (4) output is requantized or dequantized: + // false: dequantized + // true: requantized + // (5) weight matrix is transposed : {false, true} + void VerifyQuantizedMatMul(std::vector fused_ops) { + if (!HasQuantizationSupport()) { + GTEST_SKIP() << "oneDNN based Quantized ops are not enabled on this CPU."; + } + const GraphRunner run_default = + [&](const Tensor& x, const Tensor& y, + const FusedOpsAndTensors& fused_ops_and_tensors, Tensor* result, + bool transpose_x, bool transpose_y) { + this->RunMatMulAndFusedOps(x, y, fused_ops_and_tensors, result, + transpose_x, transpose_y); + }; + + const QuantizedGraphRunner run_quantized = + [&](const Tensor& x, const Tensor& y, + const FusedOpsAndTensors& fused_ops_and_tensors, Tensor* result, + bool transpose_x, bool transpose_y, string input_quant_mode, + string output_quant_mode, bool is_bias_quantized, + bool is_perchannel, bool requantize, float output_min, + float output_max) { + this->RunQuantizedMatMul( + x, y, fused_ops_and_tensors, result, transpose_x, transpose_y, + input_quant_mode, output_quant_mode, is_bias_quantized, + is_perchannel, requantize, output_min, output_max); + }; + + const double atol = 1e-2; + constexpr int M = 3; + constexpr int K = 4; + constexpr int N = 5; + bool transpose_x = false; // OpKernel does not support transpose_x. + std::vector x_dims; + std::vector y_dims; + FusedOpsAndDims fused_ops_and_dims; + GetFusionConfiguration(fused_ops, M, N, &fused_ops_and_dims); + std::vector requantization_config; + if (fused_ops == std::vector{"BiasAdd", "Add"}) { + // MatMul + BiasAdd + Add + Requantize fusion is not supported yet. + requantization_config = {false}; + } else { + requantization_config = {false, true}; + } + for (bool transpose_y : {false, true}) { + x_dims = + transpose_x ? std::vector{K, M} : std::vector{M, K}; + y_dims = + transpose_y ? std::vector{N, K} : std::vector{K, N}; + for (bool per_channel : {false, true}) { + for (string input_quant_mode : {"MIN_FIRST", "SCALED"}) { + for (string output_quant_mode : {"MIN_FIRST", "SCALED"}) { + for (bool requantize : requantization_config) { + VerifyTensorsNear( + x_dims, y_dims, fused_ops_and_dims, run_default, + run_quantized, transpose_x, transpose_y, atol, + input_quant_mode, output_quant_mode, false, per_channel, + requantize); + } + } + } + } + } + } +}; + +TYPED_TEST_SUITE_P(FusedMatMulOpsTest); + +// Realnumber typed test. +TYPED_TEST_P(FusedMatMulOpsTest, BiasAddGeluApproximate) { + this->VerifyFusedMatMul({"BiasAdd", "GeluApproximate"}); +} + +// The following tests are for quantized fusions. +TYPED_TEST_P(FusedMatMulOpsTest, Quantized_BiasAdd) { + this->VerifyQuantizedMatMul({"BiasAdd"}); +} + +TYPED_TEST_P(FusedMatMulOpsTest, Quantized_BiasAddRelu) { + this->VerifyQuantizedMatMul({"BiasAdd", "Relu"}); +} + +TYPED_TEST_P(FusedMatMulOpsTest, Quantized_BiasAddRelu6) { + this->VerifyQuantizedMatMul({"BiasAdd", "Relu6"}); +} + +TYPED_TEST_P(FusedMatMulOpsTest, Quantized_BiasAddLeakyRelu) { + this->VerifyQuantizedMatMul({"BiasAdd", "LeakyRelu"}); +} + +TYPED_TEST_P(FusedMatMulOpsTest, Quantized_BiasAddElu) { + this->VerifyQuantizedMatMul({"BiasAdd", "Elu"}); +} + +TYPED_TEST_P(FusedMatMulOpsTest, Quantized_BiasAddTanh) { + this->VerifyQuantizedMatMul({"BiasAdd", "Tanh"}); +} + +TYPED_TEST_P(FusedMatMulOpsTest, Quantized_BiasAddSigmoid) { + this->VerifyQuantizedMatMul({"BiasAdd", "Sigmoid"}); +} + +TYPED_TEST_P(FusedMatMulOpsTest, Quantized_BiasAddGeluApproximate) { + this->VerifyQuantizedMatMul({"BiasAdd", "GeluApproximate"}); +} + +TYPED_TEST_P(FusedMatMulOpsTest, Quantized_BiasAddAdd) { + this->VerifyQuantizedMatMul({"BiasAdd", "Add"}); +} + +REGISTER_TYPED_TEST_SUITE_P(FusedMatMulOpsTest, BiasAddGeluApproximate, + Quantized_BiasAdd, Quantized_BiasAddRelu, + Quantized_BiasAddRelu6, Quantized_BiasAddLeakyRelu, + Quantized_BiasAddElu, Quantized_BiasAddTanh, + Quantized_BiasAddSigmoid, + Quantized_BiasAddGeluApproximate, + Quantized_BiasAddAdd); + +// TODO(intel-tf): Add bfloat16 to Types when PR#56613 is merged. +using DataTypes = ::testing::Types; + +INSTANTIATE_TYPED_TEST_SUITE_P(Test, FusedMatMulOpsTest, DataTypes); + +} // namespace tensorflow + +#endif // INTEL_MKL diff --git a/tensorflow/core/ops/mkl_nn_ops.cc b/tensorflow/core/ops/mkl_nn_ops.cc index 868e15113b406c..14a42f29ddecbd 100644 --- a/tensorflow/core/ops/mkl_nn_ops.cc +++ b/tensorflow/core/ops/mkl_nn_ops.cc @@ -1937,6 +1937,46 @@ operation. expected to invoke these operators. )doc"); +REGISTER_OP("_QuantizedMatMul") + // Variable number of inputs depending on fusion. The inputs contain + // quantized or real tensors. Some of the inputs carry min-max values for + // quantized tensors. + .Input("device_inputs: Tdevice_inputs") + .Input("host_inputs: Thost_inputs") + // Variable number of outputs depending on the main output type. For + // example, quantized output will need additional tensors to carry min-max + // values. If the output type is real tensor (e.g. Dequantize fusion), the + // op should produce only single output tensor. + .Output("device_outputs: Tdevice_outputs") + .Output("host_outputs: Thost_outputs") + .Attr("Tdevice_inputs: list(type) >= 0 = []") + .Attr("Thost_inputs: list(type) >= 0 = []") + .Attr("Tdevice_outputs: list(type) >= 0 = []") + .Attr("Thost_outputs: list(type) >= 0 = []") + // The following attributes T1, T2, U, and Tout are members of Tinputs + // and Toutputs, used here for type constraints in the templatized OpKernel + // registrations. + .Attr("T1: quantizedtype") // 0-th input     + .Attr("T2: quantizedtype") // 1st input + .Attr("Tbias: {bfloat16, float, quantizedtype} = DT_FLOAT") + // Additional inputs' type. Currently, restricting all to be of same type. + .Attr("U: {bfloat16, float, quantizedtype} = DT_FLOAT") + .Attr("Tout: {bfloat16, float, quantizedtype} = DT_FLOAT") // 0-th output   + .Attr("transpose_a: bool = false") + .Attr("transpose_b: bool = false") + .Attr("is_weight_const: bool = true") + .Attr("is_bias_const: bool = true") + .Attr("fused_ops: list(string) = []") + // Attribute for quantization mode of all quantized input tensors. + // Currently restricting all operands using same quantization mode. + .Attr("input_quant_mode: {'MIN_FIRST', 'SCALED'} = 'SCALED'") + // Attribute for activation (0-th output) requnatization mode + .Attr("output_quant_mode: {'MIN_FIRST', 'SCALED'} = 'SCALED'") + // Attributes for the LeakyRelu ----------------------------------------- // + .Attr("leakyrelu_alpha: float = 0.2") + // ---------------------------------------------------------------------- // + .SetShapeFn(shape_inference::MatMulShape); + } // namespace tensorflow #endif // INTEL_MKL From cf2678f166d5c13b7face1cebd23ce237108bd5c Mon Sep 17 00:00:00 2001 From: mdfaijul Date: Tue, 30 Apr 2024 11:52:11 -0700 Subject: [PATCH 002/478] Fix round mode. --- tensorflow/core/kernels/mkl/mkl_kernel_util.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mkl/mkl_kernel_util.cc b/tensorflow/core/kernels/mkl/mkl_kernel_util.cc index 02e5d0a578df9c..504247ff6613ec 100644 --- a/tensorflow/core/kernels/mkl/mkl_kernel_util.cc +++ b/tensorflow/core/kernels/mkl/mkl_kernel_util.cc @@ -50,7 +50,7 @@ void MklTestingUtil::RunMklQuantizeOp(const Tensor& input, .Input(max_node) .Attr("T", type) .Attr("mode", mode) - .Attr("round_mode", "HALF_TO_EVEN") + .Attr("round_mode", round_mode) .Attr("_kernel", "QuantizedMklOp") .Finalize(&*graph, &quantize_op)); From 803d7917db1890f9f1af6be55a3f6a0b086a0f75 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 May 2024 19:21:12 +0000 Subject: [PATCH 003/478] Bump werkzeug from 3.0.1 to 3.0.3 Bumps [werkzeug](https://github.com/pallets/werkzeug) from 3.0.1 to 3.0.3. - [Release notes](https://github.com/pallets/werkzeug/releases) - [Changelog](https://github.com/pallets/werkzeug/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/werkzeug/compare/3.0.1...3.0.3) --- updated-dependencies: - dependency-name: werkzeug dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements_lock_3_10.txt | 6 +++--- requirements_lock_3_11.txt | 6 +++--- requirements_lock_3_12.txt | 6 +++--- requirements_lock_3_9.txt | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/requirements_lock_3_10.txt b/requirements_lock_3_10.txt index 05dc3940487eef..f17468ddaafd0a 100644 --- a/requirements_lock_3_10.txt +++ b/requirements_lock_3_10.txt @@ -522,9 +522,9 @@ urllib3==2.2.0 \ --hash=sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20 \ --hash=sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224 # via requests -werkzeug==3.0.1 \ - --hash=sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc \ - --hash=sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10 +werkzeug==3.0.3 \ + --hash=sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18 \ + --hash=sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8 # via tb-nightly wheel==0.41.3 \ --hash=sha256:488609bc63a29322326e05560731bf7bfea8e48ad646e1f5e40d366607de0942 \ diff --git a/requirements_lock_3_11.txt b/requirements_lock_3_11.txt index 05dc3940487eef..f17468ddaafd0a 100644 --- a/requirements_lock_3_11.txt +++ b/requirements_lock_3_11.txt @@ -522,9 +522,9 @@ urllib3==2.2.0 \ --hash=sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20 \ --hash=sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224 # via requests -werkzeug==3.0.1 \ - --hash=sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc \ - --hash=sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10 +werkzeug==3.0.3 \ + --hash=sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18 \ + --hash=sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8 # via tb-nightly wheel==0.41.3 \ --hash=sha256:488609bc63a29322326e05560731bf7bfea8e48ad646e1f5e40d366607de0942 \ diff --git a/requirements_lock_3_12.txt b/requirements_lock_3_12.txt index 120ec6ebcd7c72..0d045ea1a0579c 100644 --- a/requirements_lock_3_12.txt +++ b/requirements_lock_3_12.txt @@ -530,9 +530,9 @@ urllib3==2.2.0 \ --hash=sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20 \ --hash=sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224 # via requests -werkzeug==3.0.1 \ - --hash=sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc \ - --hash=sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10 +werkzeug==3.0.3 \ + --hash=sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18 \ + --hash=sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8 # via tb-nightly wheel==0.41.3 \ --hash=sha256:488609bc63a29322326e05560731bf7bfea8e48ad646e1f5e40d366607de0942 \ diff --git a/requirements_lock_3_9.txt b/requirements_lock_3_9.txt index 36a55514cd788b..48c74173fe553f 100644 --- a/requirements_lock_3_9.txt +++ b/requirements_lock_3_9.txt @@ -526,9 +526,9 @@ urllib3==2.2.0 \ --hash=sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20 \ --hash=sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224 # via requests -werkzeug==3.0.1 \ - --hash=sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc \ - --hash=sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10 +werkzeug==3.0.3 \ + --hash=sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18 \ + --hash=sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8 # via tb-nightly wheel==0.41.3 \ --hash=sha256:488609bc63a29322326e05560731bf7bfea8e48ad646e1f5e40d366607de0942 \ From bfe2973e107bd6aaa12c29a3204a9b59ed018440 Mon Sep 17 00:00:00 2001 From: mdfaijul Date: Sat, 11 May 2024 13:41:38 -0700 Subject: [PATCH 004/478] Address review comments. --- tensorflow/core/kernels/mkl/mkl_kernel_util.h | 4 ++-- tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc | 8 ++++---- tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h | 2 +- .../core/kernels/mkl/onednn_fused_matmul_ops_test.cc | 5 ++--- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/kernels/mkl/mkl_kernel_util.h b/tensorflow/core/kernels/mkl/mkl_kernel_util.h index a3015a7b09025c..da600fb001e038 100644 --- a/tensorflow/core/kernels/mkl/mkl_kernel_util.h +++ b/tensorflow/core/kernels/mkl/mkl_kernel_util.h @@ -56,8 +56,8 @@ class MklTestingUtil { // their dtype set to DT_FLOAT. template static Status GetQuantizationTensors(const Tensor& input, Tensor* output, - DataType out_type, const string mode, - Tensor* min_tensor, Tensor* max_tensor) { + DataType out_type, const string mode, + Tensor* min_tensor, Tensor* max_tensor) { if (min_tensor->dtype() != DT_FLOAT || max_tensor->dtype() != DT_FLOAT) { return absl::UnimplementedError("Tensor must be float32."); } diff --git a/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc b/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc index 7e5544f1c3e3b4..a78f81d09db25f 100644 --- a/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc +++ b/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc @@ -27,7 +27,7 @@ limitations under the License. #include "tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h" #include "tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "absl/container/inlined_vector.h" #include "tensorflow/core/platform/errors.h" namespace tensorflow { @@ -409,14 +409,14 @@ class QuantizedFusedMatMulOp struct OperandInfo { int idx = -1; // Operand tensor index if needed by a post-op. // Indices of min and max value tensors, if the operand is quantized. - gtl::InlinedVector min_max_indices; + absl::InlinedVector min_max_indices; } operand_info; // Indices of output min and max value tensors. It is used when requantize // is fused. - gtl::InlinedVector min_max_indices; + absl::InlinedVector min_max_indices; }; - gtl::InlinedVector post_op_info_list_; + absl::InlinedVector post_op_info_list_; void Initialize(OpKernelConstruction* context) { OP_REQUIRES_OK(context, diff --git a/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h b/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h index 6159b1d047ea65..dc4dfcb3663048 100644 --- a/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h +++ b/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h @@ -102,7 +102,7 @@ struct MklDnnMatMulFwdParams { struct PostOpParam { string name; std::vector param; - string partial_key = string(""); + string partial_key; }; std::vector post_op_params; string input_quant_mode; diff --git a/tensorflow/core/kernels/mkl/onednn_fused_matmul_ops_test.cc b/tensorflow/core/kernels/mkl/onednn_fused_matmul_ops_test.cc index af1d056ca7530a..c26535012b2aee 100644 --- a/tensorflow/core/kernels/mkl/onednn_fused_matmul_ops_test.cc +++ b/tensorflow/core/kernels/mkl/onednn_fused_matmul_ops_test.cc @@ -491,7 +491,6 @@ class FusedMatMulOpsTest : public OpsTestBase { string output_quant_mode = "SCALED", bool is_bias_quantized = false, bool is_perchannel = false, bool requantize = false) { - srand(1234); DataType dtype = DataTypeToEnum::v(); TensorShape x_shape = TensorShape(x_dims); TensorShape y_shape = TensorShape(y_dims); @@ -532,13 +531,13 @@ class FusedMatMulOpsTest : public OpsTestBase { output_min = static_cast(min); output_max = static_cast(max); } - // Run quantized fusion + // Run quantized fusion. run_fused(x_tensor, y_tensor, fused_ops_and_tensors, &fused_result, transpose_x, transpose_y, input_quant_mode, output_quant_mode, is_bias_quantized, is_perchannel, requantize, output_min, output_max); } else { - // Run realnumber type fusion + // Run realnumber type fusion. run_fused(x_tensor, y_tensor, fused_ops_and_tensors, &fused_result, transpose_x, transpose_y); } From 48fee16c70295881b78fe1a394480764a74114b1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 02:02:12 -0700 Subject: [PATCH 005/478] Update GraphDef version to 1861. PiperOrigin-RevId: 633133739 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index ad2911fae4c67f..aafe138460a6db 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 1860 // Updated: 2024/5/12 +#define TF_GRAPH_DEF_VERSION 1861 // Updated: 2024/5/13 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From c04d2ba725b3ed6dec7cab9162adbb42bcc29151 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 02:02:12 -0700 Subject: [PATCH 006/478] compat: Update forward compatibility horizon to 2024-05-13 PiperOrigin-RevId: 633133741 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 21f795a3a55a41..2a9869afa5dfb4 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -29,7 +29,7 @@ # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2024, 5, 12) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2024, 5, 13) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 76a05ff1230d0c4100ebce395b992c05700fedca Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 13 May 2024 02:19:29 -0700 Subject: [PATCH 007/478] [XLA:NFC] Support generating LLVM CPU output for hlo-opt PiperOrigin-RevId: 633137810 --- third_party/xla/xla/tools/hlo_opt/BUILD | 7 +++++ .../xla/xla/tools/hlo_opt/cpu_llvm.hlo | 19 ++++++++++++++ third_party/xla/xla/tools/hlo_opt/cpu_opt.cc | 26 +++++++++++++++++++ 3 files changed, 52 insertions(+) create mode 100644 third_party/xla/xla/tools/hlo_opt/cpu_llvm.hlo diff --git a/third_party/xla/xla/tools/hlo_opt/BUILD b/third_party/xla/xla/tools/hlo_opt/BUILD index 06e2380b70a542..dd7f62731a27f9 100644 --- a/third_party/xla/xla/tools/hlo_opt/BUILD +++ b/third_party/xla/xla/tools/hlo_opt/BUILD @@ -100,10 +100,16 @@ cc_library( srcs = ["cpu_opt.cc"], deps = [ ":opt_lib", + "//xla/hlo/ir:hlo", "//xla/service:cpu_plugin", + "//xla/service:executable", "//xla/service:hlo_graph_dumper", + "//xla/service/cpu:cpu_executable", "//xla/stream_executor/host:host_platform", "//xla/stream_executor/platform", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@local_tsl//tsl/platform:statusor", ], alwayslink = True, # Initializer needs to run. ) @@ -149,6 +155,7 @@ lit_test_suite( srcs = enforce_glob( [ "cpu_hlo.hlo", + "cpu_llvm.hlo", "gpu_hlo.hlo", "gpu_hlo_backend.hlo", "gpu_hlo_buffers.hlo", diff --git a/third_party/xla/xla/tools/hlo_opt/cpu_llvm.hlo b/third_party/xla/xla/tools/hlo_opt/cpu_llvm.hlo new file mode 100644 index 00000000000000..fbb033e53b07eb --- /dev/null +++ b/third_party/xla/xla/tools/hlo_opt/cpu_llvm.hlo @@ -0,0 +1,19 @@ +// RUN: hlo-opt %s --platform=cpu --stage=llvm-before-optimizations --split-input-file | FileCheck --check-prefixes=CHECK %s + +HloModule m + +add { + a = s8[] parameter(0) + b = s8[] parameter(1) + ROOT out = s8[] add(a, b) +} + +// CHECK: i8 +ENTRY e { + p1 = s8[1048576] parameter(0) + i = s8[] constant(0) + ROOT out = s8[] reduce(p1, i), dimensions={0}, to_apply=add +} + + + diff --git a/third_party/xla/xla/tools/hlo_opt/cpu_opt.cc b/third_party/xla/xla/tools/hlo_opt/cpu_opt.cc index 9e7d5c2b72ace7..fc5e540fc7907a 100644 --- a/third_party/xla/xla/tools/hlo_opt/cpu_opt.cc +++ b/third_party/xla/xla/tools/hlo_opt/cpu_opt.cc @@ -14,10 +14,19 @@ limitations under the License. ==============================================================================*/ #include +#include +#include #include +#include +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "xla/hlo/ir/hlo_module.h" +#include "xla/service/cpu/cpu_executable.h" +#include "xla/service/executable.h" #include "xla/stream_executor/platform/initialize.h" #include "xla/tools/hlo_opt/opt_lib.h" +#include "tsl/platform/statusor.h" namespace xla { @@ -25,6 +34,23 @@ namespace { class CpuOptProvider : public OptProvider { public: + absl::StatusOr> GenerateStage( + std::unique_ptr module, absl::string_view s) override { + if (s == "llvm-before-optimizations") { + TF_ASSIGN_OR_RETURN(std::unique_ptr executable, + GetExecutable(std::move(module))); + return static_cast(executable.get()) + ->ir_module_string(); + } + return OptProvider::GenerateStage(std::move(module), s); + } + + std::set SupportedStages() override { + std::set supported = OptProvider::SupportedStages(); + supported.insert({"llvm-before-optimizations"}); + return supported; + } + std::string GetPlatformName() override { return "cpu"; } }; From f0e890dd698abda68b7204c68bf3d4e0fb1056af Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 13 May 2024 02:28:48 -0700 Subject: [PATCH 008/478] [XLA] Change default hlo-bisect output format to hlo PiperOrigin-RevId: 633139592 --- third_party/xla/xla/tools/hlo_bisect/hlo_bisect.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/xla/xla/tools/hlo_bisect/hlo_bisect.cc b/third_party/xla/xla/tools/hlo_bisect/hlo_bisect.cc index 73b018323f34c3..588e4f52999172 100644 --- a/third_party/xla/xla/tools/hlo_bisect/hlo_bisect.cc +++ b/third_party/xla/xla/tools/hlo_bisect/hlo_bisect.cc @@ -50,7 +50,7 @@ struct BisectOptions { std::string input = ""; std::string script = ""; std::string dump_path = "/tmp/hlo_bisect"; - std::string output_format = "pb"; + std::string output_format = "hlo"; bool all_computations = false; std::string test_platform = "CUDA"; std::string reference_platform = "Interpreter"; From 9213f23cdf5f1366ed569c2626963c660ba136fd Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 13 May 2024 02:57:51 -0700 Subject: [PATCH 009/478] [XLA:GPU] [NFC] Propagate Status from allocation failures properly Do not attempt to wrap/rewrap Status, as that loses the attached stack trace, and causes further errors. PiperOrigin-RevId: 633145137 --- third_party/xla/xla/service/gpu/gpu_executable.cc | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/third_party/xla/xla/service/gpu/gpu_executable.cc b/third_party/xla/xla/service/gpu/gpu_executable.cc index d815974e98aec7..4c56c84cdadbe8 100644 --- a/third_party/xla/xla/service/gpu/gpu_executable.cc +++ b/third_party/xla/xla/service/gpu/gpu_executable.cc @@ -754,16 +754,12 @@ absl::StatusOr GpuExecutable::BufferForAllocation( const int64_t buffer_size = allocation.size(); se::DeviceMemoryBase buffer_address; if (buffer_size > 0) { - absl::StatusOr buffer = + TF_ASSIGN_OR_RETURN( + se::OwningDeviceMemory buffer, memory_allocator->Allocate(device_ordinal, buffer_size, /*retry_on_failure=*/true, - /*memory_space=*/allocation.color()); - if (!buffer.ok()) { - return ResourceExhausted("%s\n%s\n", buffer.status().message(), - buffer_assignment_->ToVerboseString( - debug_buffer_assignment_show_max_)); - } - buffer_address = buffer->Release(); + /*memory_space=*/allocation.color())); + buffer_address = buffer.Release(); } return buffer_address; } From befd651123afc501e5e8d7f15a4a409f8654adc4 Mon Sep 17 00:00:00 2001 From: Johannes Reifferscheid Date: Mon, 13 May 2024 03:08:38 -0700 Subject: [PATCH 010/478] Add reduction group support. After this, all legacy fusions are supported by MLIR emitters. Currently, there is still one failure in the JAX test suite, but it is not a reduction fusion. PiperOrigin-RevId: 633147696 --- third_party/xla/xla/service/gpu/fusions/BUILD | 5 +- .../xla/xla/service/gpu/fusions/fusions.cc | 36 +- .../fusions/mlir/computation_partitioner.cc | 2 + .../gpu/fusions/mlir/mlir_fusion_emitter.cc | 6 +- .../xla/service/gpu/fusions/reduction_mlir.cc | 388 +++++++++--------- .../xla/service/gpu/fusions/reduction_mlir.h | 14 +- .../gpu/fusions/reduction_mlir_test.cc | 31 ++ 7 files changed, 258 insertions(+), 224 deletions(-) diff --git a/third_party/xla/xla/service/gpu/fusions/BUILD b/third_party/xla/xla/service/gpu/fusions/BUILD index 77e69877f3b5e3..cb89eb885d4e59 100644 --- a/third_party/xla/xla/service/gpu/fusions/BUILD +++ b/third_party/xla/xla/service/gpu/fusions/BUILD @@ -826,9 +826,9 @@ cc_library( srcs = ["reduction_mlir.cc"], hdrs = ["reduction_mlir.h"], deps = [ + ":fusion_emitter", ":reduction_base", "//xla:shape_util", - "//xla:status_macros", "//xla/hlo/ir:hlo", "//xla/service/gpu:hlo_fusion_analysis", "//xla/service/gpu:ir_emission_utils", @@ -842,6 +842,7 @@ cc_library( "//xla/service/gpu/model:indexing_map", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/status", "@com_google_absl//absl/types:span", "@llvm-project//llvm:Support", @@ -865,11 +866,9 @@ xla_cc_test( ":reduction_mlir", "//xla:error_spec", "//xla/service:gpu_plugin", - "//xla/tests:filecheck", "//xla/tests:xla_internal_test_main", "@com_google_googletest//:gtest", "@local_tsl//tsl/lib/core:status_test_util", - "@local_tsl//tsl/platform:statusor", ], ) diff --git a/third_party/xla/xla/service/gpu/fusions/fusions.cc b/third_party/xla/xla/service/gpu/fusions/fusions.cc index a44477e0f9cc42..3567463575539f 100644 --- a/third_party/xla/xla/service/gpu/fusions/fusions.cc +++ b/third_party/xla/xla/service/gpu/fusions/fusions.cc @@ -141,24 +141,16 @@ absl::StatusOr> GetFusionEmitter( .GetModule() ->config() .debug_options(); - auto check_mlir_emitters = [&](std::function - support_check) { + auto check_mlir_emitters = [&](bool check = true) { if (!opts.xla_gpu_enable_mlir_emitters()) { return false; } - if (!mlir_converter::IsHloConversionSupported( - analysis.fusion(), - fusion_info.analysis().device_info().gpu_compute_capability())) { - VLOG(5) << "Skipping MLIR emission because the fusion contains " - "unsupported instructions."; - return false; - } - if (support_check && !support_check(analysis)) { - VLOG(5) << "Skipping MLIR emission because the fusion emitter does not " - "support " - "the fusion."; - return false; - } + CHECK(!check || + mlir_converter::IsHloConversionSupported( + analysis.fusion(), + fusion_info.analysis().device_info().gpu_compute_capability())) + << "Unsupported fusion: " + << analysis.fusion_root(0).instruction().parent()->ToString(); static int num_mlir_emitters = 0; if (is_emission_phase) { @@ -191,14 +183,14 @@ absl::StatusOr> GetFusionEmitter( return std::make_unique(); } case HloFusionAnalysis::EmitterFusionKind::kInputSlices: - if (check_mlir_emitters(nullptr)) { + if (check_mlir_emitters()) { return std::make_unique(analysis); } return std::make_unique(analysis); case HloFusionAnalysis::EmitterFusionKind::kLoop: { if (IsDynamicUpdateSliceFusion(analysis) && fusion_info.CanEmitDynamicUpdateSliceInPlace()) { - if (check_mlir_emitters(nullptr)) { + if (check_mlir_emitters()) { return std::make_unique( analysis); } @@ -209,30 +201,30 @@ absl::StatusOr> GetFusionEmitter( return *std::move(copy_fusion); } - if (check_mlir_emitters(nullptr)) { + if (check_mlir_emitters()) { return std::make_unique(analysis); } return std::make_unique(analysis); } case HloFusionAnalysis::EmitterFusionKind::kReduction: - if (check_mlir_emitters(MlirReductionFusion::IsSupported)) { + if (check_mlir_emitters()) { return std::make_unique(analysis); } return std::make_unique(analysis); case HloFusionAnalysis::EmitterFusionKind::kScatter: { - if (check_mlir_emitters(MlirScatterFusion::IsSupported)) { + if (check_mlir_emitters(false)) { return std::make_unique(analysis); } return std::make_unique(analysis); } case HloFusionAnalysis::EmitterFusionKind::kTranspose: { - if (check_mlir_emitters(nullptr)) { + if (check_mlir_emitters()) { return std::make_unique(analysis); } return std::make_unique(analysis); } case HloFusionAnalysis::EmitterFusionKind::kConcatenate: { - if (check_mlir_emitters(nullptr)) { + if (check_mlir_emitters()) { return std::make_unique(analysis); } return std::make_unique(analysis); diff --git a/third_party/xla/xla/service/gpu/fusions/mlir/computation_partitioner.cc b/third_party/xla/xla/service/gpu/fusions/mlir/computation_partitioner.cc index dffbb918b2d960..97c3cba8b3892f 100644 --- a/third_party/xla/xla/service/gpu/fusions/mlir/computation_partitioner.cc +++ b/third_party/xla/xla/service/gpu/fusions/mlir/computation_partitioner.cc @@ -321,6 +321,7 @@ PartitionedComputation::PartitionedComputation( PartitionedComputation::Subgraph PartitionedComputation::Subgraph::ForEpilogue( const EpilogueSpecification& epilogue) { + if (epilogue.roots.empty()) return {}; const auto* computation = epilogue.heroes.front()->parent(); PartitionedComputation::Subgraph subgraph; subgraph.name = llvm_ir::SanitizeFunctionName( @@ -407,6 +408,7 @@ PartitionedComputations::DeclareFunctions(mlir::ModuleOp module) const { auto create_funcs = [&](absl::Span subgraphs) { for (const auto& subgraph : subgraphs) { + if (subgraph.roots.empty()) continue; auto func_op = CreateSubgraphMlirFunction(subgraph, builder); func_op->setAttr("llvm.linkage", mlir::LLVM::LinkageAttr::get( module->getContext(), diff --git a/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc b/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc index d0969dde6517cb..286b3e211df558 100644 --- a/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc +++ b/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc @@ -480,7 +480,7 @@ absl::Status MlirFusionEmitterBase::EmitMlir( // The epilogue functions replace the root tuple. auto* root = fusion.fused_instructions_computation()->root_instruction(); - if (!epilogues.empty() && root->opcode() == HloOpcode::kTuple) { + if (root->opcode() == HloOpcode::kTuple && !epilogues.empty()) { subgraph_to_mlir_fn.extract(&computations.FindSubgraph(root)) .mapped() .erase(); @@ -497,6 +497,7 @@ absl::Status MlirFusionEmitterBase::EmitMlir( } } for (const auto& epilogue : computations.epilogues()) { + if (epilogue.roots.empty()) continue; TF_RETURN_IF_ERROR(mlir_converter::SubgraphToMlirFunction( computations.FindPartitionedComputation( fusion.fused_instructions_computation()), @@ -524,6 +525,9 @@ MlirFusionEmitterBase::EmitEpilogue( injected, ValueRange output_indices, mlir::ImplicitLocOpBuilder& builder) const { const auto& epilogue = computations.epilogues().at(epilogue_index); + if (epilogue.roots.empty()) { + return {}; + } auto epilogue_fn = mlir::cast( entry_fn->getParentOfType().lookupSymbol(epilogue.name)); SmallVector operands = ValueRange(entry_fn.getArguments().take_front( diff --git a/third_party/xla/xla/service/gpu/fusions/reduction_mlir.cc b/third_party/xla/xla/service/gpu/fusions/reduction_mlir.cc index bd75a7e2ecbea5..8c1dcdaf3b7bf9 100644 --- a/third_party/xla/xla/service/gpu/fusions/reduction_mlir.cc +++ b/third_party/xla/xla/service/gpu/fusions/reduction_mlir.cc @@ -15,14 +15,14 @@ limitations under the License. #include "xla/service/gpu/fusions/reduction_mlir.h" #include -#include +#include #include #include "absl/algorithm/container.h" #include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/status/status.h" #include "absl/types/span.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "mlir/Dialect/Arith/IR/Arith.h" // from @llvm-project @@ -40,6 +40,7 @@ limitations under the License. #include "mlir/IR/ValueRange.h" // from @llvm-project #include "xla/hlo/ir/hlo_instruction.h" #include "xla/hlo/ir/hlo_instructions.h" +#include "xla/service/gpu/fusions/fusion_emitter.h" #include "xla/service/gpu/fusions/mlir/computation_partitioner.h" #include "xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir.h" #include "xla/service/gpu/fusions/mlir/ir/xla_gpu_ops.h" @@ -51,11 +52,11 @@ limitations under the License. #include "xla/service/gpu/model/indexing_map.h" #include "xla/service/gpu/reduction_utils.h" #include "xla/shape_util.h" -#include "xla/status_macros.h" namespace xla { namespace gpu { +namespace ma = mlir::arith; using llvm::SmallVector; using mlir::Value; using mlir::ValueRange; @@ -75,7 +76,9 @@ struct MlirReductionFusion::EmitterState { fusion(fusion), computations(computations), call_target(call_target), - builder(entry_function.getLoc(), entry_function) { + builder(entry_function.getLoc(), entry_function), + computation(computations.FindPartitionedComputation( + fusion.fused_instructions_computation())) { int index = 0; for (auto root : owner.analysis().fusion_roots()) { fusion_result_index_starts[root] = index; @@ -83,9 +86,11 @@ struct MlirReductionFusion::EmitterState { } } - // Uses the given indexing map to reduce a subset of the inputs in a single - // thread. The subset may be a single element. - HloValueMap EmitPerThreadReducedElements(const HloValueMap& inits); + // Reduces a subset of the inputs in a single thread. Also writes side outputs + // to the output tensors. The map contains the reduced values for reductions + // and the written tensors for side outputs. + HloValueMap EmitPerThreadReducedElements(int group_id, + const HloValueMap& inits); mlir::func::FuncOp GetReducer(const HloInstruction* hero) const { return call_target(hero->called_computations()[0]->root_instruction()); @@ -109,87 +114,107 @@ struct MlirReductionFusion::EmitterState { const PartitionedComputations& computations; const mlir_converter::CallTargetProvider& call_target; mlir::ImplicitLocOpBuilder builder; + const mlir_converter::PartitionedComputation& computation; absl::flat_hash_map fusion_result_index_starts; + SmallVector thread_and_block_ids; }; MlirReductionFusion::MlirReductionFusion(const HloFusionAnalysis& analysis) : ReductionFusionBase(analysis) { + CHECK(reduction_info().IsRaceFree()) + << "Non-race-free reductions should have been decomposed. Did " + "tree_reduction_rewriter run?"; + + const auto& groups = reduction_info().GetGroups(); + int num_groups = groups.grouped_roots.size(); + side_output_roots_.resize(num_groups); + reduction_heroes_.resize(num_groups); + reduction_roots_.resize(num_groups); + absl::flat_hash_set seen_heroes; - const auto& is_reduction_root = - reduction_info().GetGroups().is_reduction_root; - first_reduction_root_index_ = std::distance( - is_reduction_root.begin(), absl::c_find(is_reduction_root, true)); - for (auto [root, hero, is_reduction] : + for (auto [root, hero, is_reduction, group_id] : llvm::zip(analysis.fusion_roots(), analysis.fusion_heroes(), - reduction_info().GetGroups().is_reduction_root)) { - (is_reduction ? reduction_roots_ : side_output_roots_).push_back(root); - if (is_reduction && seen_heroes.insert(hero).second) { - reduction_heroes_.push_back(hero); + groups.is_reduction_root, groups.group_id_per_root)) { + if (is_reduction) { + if (seen_heroes.insert(hero).second) { + reduction_heroes_[group_id].push_back(hero); + } + reduction_roots_[group_id].push_back(root); + } else { + side_output_roots_[group_id].push_back(root); } } } -bool MlirReductionFusion::IsSupported(const HloFusionAnalysis& analysis) { - auto info = ReductionInfo::Create(analysis, /*for_mlir=*/true); - return info.GetGroups().grouped_roots.size() == 1 && info.IsRaceFree(); -} - std::vector MlirReductionFusion::GetEpilogues(const HloFusionInstruction& fusion, mlir::MLIRContext* mlir_context) const { - return {mlir_converter::EpilogueSpecification::FromOutputIndexing( - analysis(), reduction_heroes_, reduction_roots_, *this, mlir_context)}; + std::vector epilogues; + epilogues.reserve(reduction_heroes_.size()); + for (const auto& [heroes, roots] : + llvm::zip(reduction_heroes_, reduction_roots_)) { + epilogues.push_back( + mlir_converter::EpilogueSpecification::FromOutputIndexing( + analysis(), heroes, roots, *this, mlir_context)); + } + return epilogues; } absl::Status MlirReductionFusion::EmitEntryFunction( - const mlir_converter::PartitionedComputations& computations, + const PartitionedComputations& computations, const mlir_converter::CallTargetProvider& call_targets, mlir::func::FuncOp entry_function, const HloFusionInstruction& fusion) const { - // Reduction groups will probably be implemented in a separate pass, since - // they share nothing by definition. - TF_RET_CHECK(reduction_info().GetGroups().grouped_roots.size() == 1) - << "Only one reduction group is supported."; EmitterState state{*this, entry_function, fusion, computations, call_targets}; - state.builder.setInsertionPointToStart(entry_function.addEntryBlock()); - return EmitReduction(state); + auto& b = state.builder; + b.setInsertionPointToStart(entry_function.addEntryBlock()); + state.thread_and_block_ids = EmitThreadAndBlockIds(b); + if (reduction_heroes_.size() == 1) { + b.create(EmitReduction(0, state)); + return absl::OkStatus(); + } + SmallVector cases(reduction_heroes_.size() - 1); + absl::c_iota(cases, 1); // `default` is region 0. + auto switch_op = b.create( + entry_function.getResultTypes(), EmitBlockId(b, 1), cases, cases.size()); + b.create(switch_op.getResults()); + for (auto [id, region] : llvm::enumerate(switch_op->getRegions())) { + b.setInsertionPointToStart(®ion.emplaceBlock()); + b.create(EmitReduction(id, state)); + } + return absl::OkStatus(); } -absl::Status MlirReductionFusion::EmitReduction(EmitterState& state) const { - auto& builder = state.builder; +llvm::SmallVector MlirReductionFusion::EmitReduction( + int group_id, EmitterState& state) const { + auto& b = state.builder; const auto& tiling = reduction_info().GetTiling(); + const auto& threads_per_block = tiling.GetThreadsPerBlock(); + auto* ctx = state.entry_function.getContext(); // The number of warps working on one element in a row reduction. - int num_warps_row = tiling.GetThreadsPerBlock() - [ReductionDimensions::kRowMinorReducedDimension] / - WarpSize(); - auto ctx = state.entry_function.getContext(); - - auto zero = builder.create(0); - auto lane_id = builder.create(); - auto is_first_lane = builder.create( - mlir::arith::CmpIPredicate::eq, lane_id, zero); - auto thread_id = EmitThreadId(builder, 0); - auto block_id = EmitBlockId(builder, 0); - Value cst_true = builder.create( - builder.getIntegerAttr(builder.getI1Type(), 1)); - + int num_warps_row = + threads_per_block[ReductionDimensions::kRowMinorReducedDimension] / + WarpSize(); + + auto zero = b.create(0); + auto lane_id = b.create(); + auto is_first_lane = + b.create(ma::CmpIPredicate::eq, lane_id, zero); + auto thread_id = state.thread_and_block_ids[0]; + Value cst_true = b.create(b.getOneAttr(b.getI1Type())); + + auto delinearized = + DelinearizeInBoundsIndex(mlir::getAffineDimExpr(0, ctx), + threads_per_block, tiling.GetThreadStrides()); auto thread_ids = mlir_converter::ApplyAffineMap( - mlir::AffineMap::get( - /*dimCount=*/1, /*symbolCount=*/0, - DelinearizeInBoundsIndex(mlir::getAffineDimExpr(0, ctx), - tiling.GetThreadsPerBlock(), - tiling.GetThreadStrides()), - ctx), - {thread_id}, {}, builder); - SmallVector thread_and_block_indices{thread_id, zero, zero, - block_id, zero, zero}; - - auto warp_id = builder.create( + mlir::AffineMap::get(1, 0, delinearized, ctx), {thread_id}, {}, b); + + auto warp_id = b.create( reduction_info().IsRowReduction() ? thread_ids[ReductionDimensions::kRowMinorReducedDimension] : thread_id, - builder.create(WarpSize())); + b.create(WarpSize())); std::vector shared_tile_size; SmallVector shared_write_indices; @@ -204,64 +229,60 @@ absl::Status MlirReductionFusion::EmitReduction(EmitterState& state) const { auto kKept = ReductionDimensions::kRowKeptDimension; shared_tile_size = {tiling.GetThreadsPerBlock()[kKept], num_warps_row}; shared_write_condition = is_first_lane; - shared_read_condition = builder.create( - mlir::arith::CmpIPredicate::ult, + shared_read_condition = b.create( + ma::CmpIPredicate::ult, thread_ids[ReductionDimensions::kRowMinorReducedDimension], - builder.create(num_warps_row)); + b.create(num_warps_row)); shared_write_indices = {thread_ids[kKept], warp_id}; shared_read_indices = {thread_ids[kKept], lane_id}; } - bool use_shared = !shared_tile_size.empty(); - - auto thread_has_output = mlir_converter::CheckConstraints( - *ComputeThreadIdToOutputIndexing(first_reduction_root_index_, ctx), - thread_and_block_indices, {}, builder); - - HloValueMap inits; - llvm::SmallVector outputs = - mlir::ValueRange(state.entry_function.getArguments().drop_front( - state.fusion.fused_parameters().size())); - HloValueMap root_output_indices; - llvm::SmallVector epilogue_input_dims; - const auto& epilogue = state.computations.epilogues().front(); - epilogue_input_dims = EmitThreadAndBlockIds(builder); - llvm::SmallVector epilogue_input_symbols( - epilogue.root_indexing.front().getNumSymbols(), zero); - for (auto [index, root] : llvm::enumerate(epilogue.roots)) { - root_output_indices[root] = mlir_converter::ApplyAffineMap( - epilogue.root_indexing[index], epilogue_input_dims, - epilogue_input_symbols, builder); - } - - for (auto [index, hero] : llvm::enumerate(reduction_heroes_)) { - int arity = hero->operand_count() / 2; - const auto& computation = - state.computations.FindPartitionedComputation(hero->parent()); - inits[hero] = - ProvideParameterRange(computation, hero, arity, arity, {}, - state.call_target, state.entry_function, builder); - } auto evaluate_epilogue = [&](const HloValueMap& results, llvm::SmallVector outputs) { - auto epilogue_indices = epilogue_input_dims; - epilogue_indices.append(epilogue_input_symbols); + const auto& epilogue = state.computations.epilogues()[group_id]; + if (epilogue.roots.empty()) return outputs; + + llvm::SmallVector epilogue_input_symbols( + epilogue.root_indexing.front().getNumSymbols(), zero); + auto epilogue_input_indices = state.thread_and_block_ids; + epilogue_input_indices.append(epilogue_input_symbols); auto values = - EmitEpilogue(/*epilogue_index=*/0, state.computations, - state.entry_function, results, epilogue_indices, builder); - const auto& epilogue = state.computations.epilogues().front(); - for (auto root : epilogue.roots) { + EmitEpilogue(group_id, state.computations, state.entry_function, + results, epilogue_input_indices, b); + int first_root_index = state.OutputIndex(epilogue.roots.front(), 0); + auto thread_has_output = mlir_converter::CheckConstraints( + *ComputeThreadIdToOutputIndexing(first_root_index, ctx), + state.thread_and_block_ids, {}, b); + for (auto [index, root] : llvm::enumerate(epilogue.roots)) { + auto output_indices = mlir_converter::ApplyAffineMap( + epilogue.root_indexing[index], state.thread_and_block_ids, + epilogue_input_symbols, b); for (auto [result_index, result] : llvm::enumerate(values.at(root))) { auto& output = outputs[state.OutputIndex(root, result_index)]; - output = builder.create( - thread_has_output, result, output, root_output_indices[root]); + output = b.create(thread_has_output, result, output, + output_indices); } } return outputs; }; - auto accumulated = state.EmitPerThreadReducedElements(inits); - for (auto root : side_output_roots_) { + HloValueMap inits; + const auto& reductions = reduction_heroes_[group_id]; + for (auto* hero : reductions) { + int arity = hero->operand_count() / 2; + inits[hero] = + ProvideParameterRange(state.computation, hero, arity, arity, {}, + state.call_target, state.entry_function, b); + } + llvm::SmallVector outputs = + mlir::ValueRange(state.entry_function.getArguments().drop_front( + state.fusion.fused_parameters().size())); + for (auto* side_output : side_output_roots_[group_id]) { + inits[side_output].push_back(outputs[state.OutputIndex(side_output, 0)]); + } + + auto accumulated = state.EmitPerThreadReducedElements(group_id, inits); + for (auto root : side_output_roots_[group_id]) { outputs[state.OutputIndex(root, 0)] = accumulated[root].front(); } @@ -269,40 +290,37 @@ absl::Status MlirReductionFusion::EmitReduction(EmitterState& state) const { // memory. In column reductions, the members of the warp process different // output elements, so we need to transpose first. if (reduction_info().IsRowReduction()) { - for (auto* hero : reduction_heroes_) { - auto reducer = state.GetReducer(hero); + for (auto* reduction : reductions) { + auto reducer = state.GetReducer(reduction); int max_dist = WarpSize() / 2 / reduction_info().GetRowsPerWarp(); - accumulated[hero] = - builder.create(reducer, accumulated[hero], max_dist) - .getResults(); + auto& values = accumulated[reduction]; + values = + b.create(reducer, values, max_dist).getResults(); } } - if (!use_shared) { - builder.create( - evaluate_epilogue(accumulated, std::move(outputs))); - return absl::OkStatus(); + if (shared_tile_size.empty()) { + return evaluate_epilogue(accumulated, std::move(outputs)); } SmallVector shared_tiles; // Write results to shared memory. - for (auto hero : reduction_heroes_) { - const auto& result = accumulated[hero]; + for (auto* hero : reductions) { auto dest = state.AllocateSharedTiles(hero, shared_tile_size); - for (auto [value, output] : llvm::zip(result, dest)) { - shared_tiles.push_back(builder.create( + for (auto [value, output] : llvm::zip(accumulated[hero], dest)) { + shared_tiles.push_back(b.create( shared_write_condition, value, output, shared_write_indices)); } } // Wait for the entire tile to be written. auto synced_tiles = - builder.create(mlir::TypeRange(shared_tiles), shared_tiles) + b.create(mlir::TypeRange(shared_tiles), shared_tiles) .getResults(); auto write_outputs = [&](mlir::OpBuilder then_builder, mlir::Location loc) { mlir::ImplicitLocOpBuilder b(loc, then_builder); int tile_index = 0; - for (auto* hero : reduction_heroes_) { + for (auto* hero : reductions) { // Load from shared memory. SmallVector reduced; for (auto init : inits[hero]) { @@ -312,112 +330,98 @@ absl::Status MlirReductionFusion::EmitReduction(EmitterState& state) const { synced_tiles[tile_index++], shared_read_indices) .getResult()); } - accumulated[hero] = builder - .create(state.GetReducer(hero), - reduced, WarpSize() / 2) - .getResults(); + const auto& reducer = state.GetReducer(hero); + accumulated[hero] = + b.create(reducer, reduced, WarpSize() / 2) + .getResults(); } b.create(loc, evaluate_epilogue(accumulated, outputs)); }; - auto warp_writes = reduction_info().IsRowReduction() - ? builder.create( - mlir::arith::CmpIPredicate::eq, warp_id, zero) - : cst_true; - auto written = builder.create( - warp_writes, write_outputs, [&](mlir::OpBuilder b, mlir::Location loc) { - b.create(loc, outputs); - }); - builder.create(written.getResults()); - - return absl::OkStatus(); + auto warp_writes = + reduction_info().IsRowReduction() + ? b.create(ma::CmpIPredicate::eq, warp_id, zero) + : cst_true; + auto yield_outputs = [&](mlir::OpBuilder else_builder, mlir::Location loc) { + else_builder.create(loc, outputs); + }; + return b.create(warp_writes, write_outputs, yield_outputs) + .getResults(); } HloValueMap MlirReductionFusion::EmitterState::EmitPerThreadReducedElements( - const HloValueMap& inits) { + int group_id, const HloValueMap& inits) { const auto& tiling = owner.reduction_info().GetTiling(); auto tile_indexing = GetIndexingMapForTiling(tiling, builder.getContext()); + tile_indexing + .GetMutableDimensionBound( + KernelFusionInterface::kIndexingMapBlockIdxDims[1]) + .upper = owner.reduction_heroes_.size(); SmallVector iter_arg_inits; - ValueRange output_args = entry_function.getArguments().drop_front( - fusion.fused_parameters().size()); - for (auto [is_reduction, hero] : - llvm::zip(owner.reduction_info().GetGroups().is_reduction_root, - owner.analysis().fusion_heroes())) { - if (is_reduction) { - iter_arg_inits.append(inits.at(hero)); - } else { - iter_arg_inits.push_back(output_args[OutputIndex(hero, 0)]); - } + const auto& side_outputs = owner.side_output_roots_[group_id]; + const auto& reductions = owner.reduction_heroes_[group_id]; + absl::flat_hash_map iter_arg_starts; + for (const auto& [hero, init] : inits) { + iter_arg_starts[hero] = iter_arg_inits.size(); + iter_arg_inits.append(init); } - const auto& computation = computations.FindPartitionedComputation( - fusion.fused_instructions_computation()); - auto body_builder = [&](ValueRange iter_args, ValueRange dim_values, ValueRange symbol_values) -> SmallVector { auto tile_indices = mlir_converter::ApplyAffineMap( tile_indexing.GetAffineMap(), dim_values, symbol_values, builder); llvm::SmallVector results(iter_args.size(), nullptr); - struct SideOutput { - llvm::SmallVector indices; - Value scalar; - int result_index; - }; - llvm::SmallVector side_outputs; - for (auto [is_reduction, hero, root] : - llvm::zip(owner.reduction_info().GetGroups().is_reduction_root, - owner.analysis().fusion_heroes(), - owner.analysis().fusion_roots())) { - const xla::Shape& input_shape = + auto get_input_indices = [&](auto* hero, bool is_reduction) { + const auto& input_shape = is_reduction ? hero->operand(0)->shape() : hero->shape(); - auto input_indices = mlir_converter::ApplyAffineMap( + return mlir_converter::ApplyAffineMap( GetBitcastMap(tiling.GetXlaShape(), input_shape, builder.getContext()) .GetAffineMap(), tile_indices, {}, builder); - int start = fusion_result_index_starts[root]; - if (is_reduction) { - int num_outs = hero->operand_count() / 2; - auto values = ProvideParameterRange( - computations.FindPartitionedComputation(hero->parent()), hero, 0, - num_outs, input_indices, call_target, entry_function, builder); - SmallVector reduce_args = iter_args.slice(start, num_outs); - reduce_args.append(values); - absl::c_copy(builder.create(GetReducer(hero), reduce_args) - .getResults(), - results.begin() + start); - } else { - auto* root_tuple = fusion.fused_expression_root(); - Value value = mlir_converter::ProvideParameter( - computation, root_tuple, root_tuple->operand_index(hero), - input_indices, call_target, entry_function, builder)[0]; - // Tensor insertions turn into writes, so they have to happen in the - // end. This could be considered a bug in the lowering, but since we - // don't have bufferization, we need to handle it here. - side_outputs.push_back({std::move(input_indices), value, start}); - } + }; + for (auto* reduction : reductions) { + int arity = reduction->operand_count() / 2; + int start = iter_arg_starts[reduction]; + SmallVector reduce_args = iter_args.slice(start, arity); + reduce_args.append(ProvideParameterRange( + computation, reduction, 0, arity, get_input_indices(reduction, true), + call_target, entry_function, builder)); + const auto& reducer = GetReducer(reduction); + absl::c_copy( + builder.create(reducer, reduce_args).getResults(), + results.begin() + start); + } + struct SideOutput { + llvm::SmallVector indices; + Value scalar; + }; + llvm::SmallVector side_output_values; + for (auto* side_output : side_outputs) { + auto indices = get_input_indices(side_output, false); + auto* root_tuple = fusion.fused_expression_root(); + Value value = mlir_converter::ProvideParameter( + computation, root_tuple, root_tuple->operand_index(side_output), + indices, call_target, entry_function, builder)[0]; + side_output_values.push_back({std::move(indices), value}); } - for (auto& side_output : side_outputs) { - int index = side_output.result_index; - results[index] = builder.create( - side_output.scalar, iter_args[index], side_output.indices); + for (const auto& [side_output, values] : + llvm::zip(side_outputs, side_output_values)) { + int offset = iter_arg_starts[side_output]; + results[offset] = builder.create( + values.scalar, iter_args[offset], values.indices); } return results; }; - auto results = owner.EmitThreadLoopNest(builder, iter_arg_inits, - tile_indexing, body_builder); - mlir::ValueRange result_range = results; + auto results_vector = owner.EmitThreadLoopNest(builder, iter_arg_inits, + tile_indexing, body_builder); + mlir::ValueRange results = results_vector; HloValueMap results_per_hero; - for (auto [is_reduction, hero] : - llvm::zip(owner.reduction_info().GetGroups().is_reduction_root, - owner.analysis().fusion_heroes())) { - int num_outs = - hero->shape().IsTuple() ? hero->shape().tuple_shapes_size() : 1; - results_per_hero[hero] = result_range.take_front(num_outs); - result_range = result_range.drop_front(num_outs); + for (const auto& [hero, init] : inits) { + results_per_hero[hero] = results.slice(iter_arg_starts[hero], init.size()); } return results_per_hero; } diff --git a/third_party/xla/xla/service/gpu/fusions/reduction_mlir.h b/third_party/xla/xla/service/gpu/fusions/reduction_mlir.h index b17285a3bc68ae..12bc8124064245 100644 --- a/third_party/xla/xla/service/gpu/fusions/reduction_mlir.h +++ b/third_party/xla/xla/service/gpu/fusions/reduction_mlir.h @@ -54,13 +54,15 @@ class MlirReductionFusion struct EmitterState; friend struct EmitterState; - absl::Status EmitReduction(EmitterState& state) const; + llvm::SmallVector EmitReduction(int group_id, + EmitterState& state) const; - std::vector reduction_heroes_; - // The roots that have reduction heroes. - std::vector reduction_roots_; - std::vector side_output_roots_; - int first_reduction_root_index_; + // The reduction heroes for each reduction group. + std::vector> reduction_heroes_; + // The roots that have reduction heroes for each reduction group. + std::vector> reduction_roots_; + // The side output roots for each reduction group. + std::vector> side_output_roots_; }; } // namespace gpu diff --git a/third_party/xla/xla/service/gpu/fusions/reduction_mlir_test.cc b/third_party/xla/xla/service/gpu/fusions/reduction_mlir_test.cc index 1fb1f2544bcda1..45b46267a2cfa7 100644 --- a/third_party/xla/xla/service/gpu/fusions/reduction_mlir_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/reduction_mlir_test.cc @@ -153,6 +153,37 @@ TEST_F(ReductionTest, RowReduceMOFEpilogue) { EXPECT_TRUE(RunAndCompareNoHloPasses(kHloString, ErrorSpec{1e-3})); } +TEST_F(ReductionTest, RowReduceMOFGroups) { + constexpr auto kHloString = R"( + %add_f32 { + %x = f32[] parameter(0) + %y = f32[] parameter(1) + ROOT %add = f32[] add(%x, %y) + } + + %fused_computation { + %param0 = f32[1024] parameter(0) + %param1 = f32[1024] parameter(1) + %constant0 = f32[] constant(0) + %reduce1 = f32[] reduce(%param0, %constant0), dimensions={0}, to_apply=%add_f32 + %reduce2 = f32[] reduce(%param1, %constant0), dimensions={0}, to_apply=%add_f32 + ROOT %tuple = (f32[], f32[]) tuple(%reduce1, %reduce2) + } + + ENTRY %cluster { + %param0 = f32[1024] parameter(0) + %param1 = f32[1024] parameter(1) + ROOT %fusion = (f32[], f32[]) + fusion(%param0, %param1), kind=kInput, calls=%fused_computation + })"; + TF_ASSERT_OK(EmitAndCheckIR(kHloString, R"( + // CHECK: scf.index_switch %block_id_y + // CHECK: case 1 { + // CHECK: default { + )")); + EXPECT_TRUE(RunAndCompareNoHloPasses(kHloString, ErrorSpec{1e-3})); +} + TEST_F(ReductionTest, ColumnReduction) { constexpr auto kHloString = R"( HloModule Test, is_scheduled=true From 35f634a2cb446e6d15dd820ed22cf8b2aa3938d5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 03:18:13 -0700 Subject: [PATCH 011/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633149675 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 2c64cab5e211e2..0556051577f5e4 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/nodeserialize +go/debugstr op { name: "Abort" attr { From d6a65049b15ec86e6ef455f7268a9273cdbcd1fb Mon Sep 17 00:00:00 2001 From: Quentin Khan Date: Mon, 13 May 2024 04:02:10 -0700 Subject: [PATCH 012/478] When no weight cache is provided to XNNPack, create one to share packed weights between operations. PiperOrigin-RevId: 633158328 --- tensorflow/lite/core/c/common.h | 2 + tensorflow/lite/core/interpreter_builder.cc | 3 +- tensorflow/lite/core/subgraph.cc | 6 +- tensorflow/lite/core/subgraph.h | 17 +- tensorflow/lite/delegates/xnnpack/BUILD | 41 + .../lite/delegates/xnnpack/weight_cache.cc | 491 ++++++++++++ .../lite/delegates/xnnpack/weight_cache.h | 306 ++++++++ .../delegates/xnnpack/weight_cache_schema.fbs | 52 ++ .../xnnpack/weight_cache_schema_generated.h | 422 ++++++++++ .../delegates/xnnpack/weight_cache_test.cc | 725 ++++++++++++++++++ .../delegates/xnnpack/xnnpack_delegate.cc | 54 +- .../lite/delegates/xnnpack/xnnpack_delegate.h | 8 + tensorflow/lite/tflite_with_xnnpack.cc | 4 + tensorflow/opensource_only.files | 1 + 14 files changed, 2124 insertions(+), 8 deletions(-) create mode 100644 tensorflow/lite/delegates/xnnpack/weight_cache.cc create mode 100644 tensorflow/lite/delegates/xnnpack/weight_cache.h create mode 100644 tensorflow/lite/delegates/xnnpack/weight_cache_schema.fbs create mode 100755 tensorflow/lite/delegates/xnnpack/weight_cache_schema_generated.h create mode 100644 tensorflow/lite/delegates/xnnpack/weight_cache_test.cc diff --git a/tensorflow/lite/core/c/common.h b/tensorflow/lite/core/c/common.h index ea54be9490ef01..96f19f12336bc4 100644 --- a/tensorflow/lite/core/c/common.h +++ b/tensorflow/lite/core/c/common.h @@ -472,6 +472,8 @@ typedef enum TfLiteCustomAllocationFlags { kTfLiteCustomAllocationFlagsSkipAlignCheck = 1, } TfLiteCustomAllocationFlags; +enum { kTfLiteNoBufferIdentifier = SIZE_MAX }; + /// A tensor in the interpreter system which is a wrapper around a buffer of /// data including a dimensionality (or NULL if not currently defined). #ifndef TF_LITE_STATIC_MEMORY diff --git a/tensorflow/lite/core/interpreter_builder.cc b/tensorflow/lite/core/interpreter_builder.cc index 41e62cfd675340..d8c6d181ebdd1a 100644 --- a/tensorflow/lite/core/interpreter_builder.cc +++ b/tensorflow/lite/core/interpreter_builder.cc @@ -691,7 +691,8 @@ TfLiteStatus InterpreterBuilder::ParseTensors( if (subgraph->SetTensorParametersReadOnly( i, type, get_name(tensor), dims, quantization, buffer_ptr, - buffer_size, allocation_, sparsity) != kTfLiteOk) { + buffer_size, allocation_, sparsity, + /*buffer_identifier=*/tensor->buffer()) != kTfLiteOk) { TF_LITE_REPORT_ERROR(error_reporter_, "Tensor %d is invalidly specified in schema.\n", i); diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc index 26ba2037342405..ce3622105b1ce5 100644 --- a/tensorflow/lite/core/subgraph.cc +++ b/tensorflow/lite/core/subgraph.cc @@ -1856,7 +1856,8 @@ TfLiteStatus Subgraph::GetNodeAndRegistration( TfLiteStatus Subgraph::SetTensorParametersReadOnly( int tensor_index, TfLiteType type, const char* name, const size_t ndims, const int* dims, TfLiteQuantization quantization, const char* buffer, - size_t bytes, const Allocation* allocation, TfLiteSparsity* sparsity) { + size_t bytes, const Allocation* allocation, TfLiteSparsity* sparsity, + const size_t buffer_identifier) { // Ensure quantization cleanup on failure. ScopedTfLiteQuantization scoped_quantization(&quantization); ScopedTfLiteSparsity scoped_sparsity(sparsity); @@ -1904,6 +1905,9 @@ TfLiteStatus Subgraph::SetTensorParametersReadOnly( tensor.quantization = *scoped_quantization.release(); tensor.sparsity = scoped_sparsity.release(); } + if (buffer_identifier != kTfLiteNoBufferIdentifier) { + tensor_buffer_identifiers_[tensor_index] = buffer_identifier; + } return kTfLiteOk; } diff --git a/tensorflow/lite/core/subgraph.h b/tensorflow/lite/core/subgraph.h index 5940bfbb232ca3..281ac04adc2096 100644 --- a/tensorflow/lite/core/subgraph.h +++ b/tensorflow/lite/core/subgraph.h @@ -23,6 +23,7 @@ limitations under the License. #include #include #include +#include #include #include #include @@ -132,16 +133,18 @@ class Subgraph { int tensor_index, TfLiteType type, const char* name, const std::vector& dims, TfLiteQuantization quantization, const char* buffer, size_t bytes, const Allocation* allocation = nullptr, - TfLiteSparsity* sparsity = nullptr) { + TfLiteSparsity* sparsity = nullptr, + size_t buffer_identifier = kTfLiteNoBufferIdentifier) { return SetTensorParametersReadOnly(tensor_index, type, name, dims.size(), dims.data(), quantization, buffer, bytes, - allocation, sparsity); + allocation, sparsity, buffer_identifier); } TfLiteStatus SetTensorParametersReadOnly( int tensor_index, TfLiteType type, const char* name, const size_t ndims, const int* dims, TfLiteQuantization quantization, const char* buffer, size_t bytes, const Allocation* allocation = nullptr, - TfLiteSparsity* sparsity = nullptr); + TfLiteSparsity* sparsity = nullptr, + size_t buffer_identifier = kTfLiteNoBufferIdentifier); // Set description of inputs/outputs/data/fptrs for node `node_index`. // This variant assumes an external buffer has been allocated of size @@ -589,6 +592,10 @@ class Subgraph { // Returns true if the subgraph has been fully delegated. bool IsFullyDelegated() const; + const std::unordered_map& GetTensorBufferIdentifiers() { + return tensor_buffer_identifiers_; + } + private: #ifndef DOXYGEN_SKIP friend class tflite::impl::InterpreterBuilder; @@ -1153,6 +1160,10 @@ class Subgraph { /// The allocator used for holding memory of the model. Note that this will /// be null if the client provides a tflite::Model directly. const Allocation* allocation_ = nullptr; + + // Maps tensor constant buffers used in the subgraph to a model-wide + // identifiers. + std::unordered_map tensor_buffer_identifiers_; }; } // namespace tflite diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD index c4f748280d70ec..911a9358129a29 100644 --- a/tensorflow/lite/delegates/xnnpack/BUILD +++ b/tensorflow/lite/delegates/xnnpack/BUILD @@ -1,3 +1,4 @@ +load("@flatbuffers//:build_defs.bzl", "flatbuffer_cc_library") load("//tensorflow:tensorflow.default.bzl", "get_compatible_with_portable") load("//tensorflow/lite:build_def.bzl", "tflite_copts") load("//tensorflow/lite:special_rules.bzl", "internal_visibility_allowlist", "tflite_portable_test_suite_combined") @@ -251,6 +252,7 @@ cc_library( ":tflite_with_xnnpack_qs8", ":tflite_with_xnnpack_qu8", ":tflite_with_xnnpack_transient_indirection_buffer", + ":weight_cache", "//tensorflow/lite:kernel_api", "//tensorflow/lite:minimal_logging", "//tensorflow/lite/c:c_api_types", @@ -289,6 +291,7 @@ cc_library( linkstatic = True, deps = [ ":quantization_util", + ":weight_cache", "//tensorflow/lite:kernel_api", "//tensorflow/lite:minimal_logging", "//tensorflow/lite/c:c_api_types", @@ -323,6 +326,30 @@ cc_library( ], ) +flatbuffer_cc_library( + name = "weight_cache_schema", + srcs = ["weight_cache_schema.fbs"], + compatible_with = get_compatible_with_portable(), + flatc_args = [ + "--gen-mutable", + "--gen-object-api", + ], +) + +cc_library( + name = "weight_cache", + srcs = ["weight_cache.cc"], + hdrs = ["weight_cache.h"], + compatible_with = get_compatible_with_portable(), + deps = [ + ":weight_cache_schema", + "//tensorflow/lite:minimal_logging", + "//tensorflow/lite/c:common", + "@XNNPACK", + "@flatbuffers//:runtime_cc", + ], +) + ################################ Tester classes ################################ cc_library( @@ -2828,4 +2855,18 @@ cc_test( ], ) +cc_test( + name = "weight_cache_test", + srcs = ["weight_cache_test.cc"], + deps = [ + ":test_main", + ":weight_cache", + ":weight_cache_schema", + "//tensorflow/lite/c:common", + "@XNNPACK", + "@com_google_googletest//:gtest", + "@flatbuffers//:runtime_cc", + ], +) + tflite_portable_test_suite_combined(combine_conditions = {"deps": [":test_main"]}) diff --git a/tensorflow/lite/delegates/xnnpack/weight_cache.cc b/tensorflow/lite/delegates/xnnpack/weight_cache.cc new file mode 100644 index 00000000000000..cb178662831839 --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/weight_cache.cc @@ -0,0 +1,491 @@ +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/delegates/xnnpack/weight_cache.h" + +#include +#include + +#if defined(_MSC_VER) +#include +#else +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xnnpack.h" // from @XNNPACK +#include "flatbuffers/base.h" // from @flatbuffers +#include "flatbuffers/flatbuffer_builder.h" // from @flatbuffers +#include "flatbuffers/verifier.h" // from @flatbuffers +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/delegates/xnnpack/weight_cache_schema_generated.h" +#include "tensorflow/lite/logger.h" +#include "tensorflow/lite/minimal_logging.h" + +#define XNNPACK_ABORT_CHECK(TEST, ...) \ + if (!(TEST)) { \ + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, __VA_ARGS__); \ + std::abort(); \ + } + +namespace tflite::xnnpack { + +namespace { +constexpr size_t kMinAlignment = 64; + +template +class ScopeGuard { + public: + explicit ScopeGuard(F&& callback) : callback_(std::forward(callback)) {} + ~ScopeGuard() { + if (active_) { + callback_(); + } + } + + void Deactivate() { active_ = false; } + + private: + F callback_; + bool active_ = true; +}; + +template +ScopeGuard(F&&) -> ScopeGuard; + +} // namespace + +void swap(MMapHandle& a, MMapHandle& b) { + using std::swap; + swap(a.size_, b.size_); + swap(a.data_, b.data_); +} + +MMapHandle::~MMapHandle() { UnMap(); } + +MMapHandle::MMapHandle(MMapHandle&& other) { swap(*this, other); } + +MMapHandle& MMapHandle::operator=(MMapHandle&& other) { + swap(*this, other); + return *this; +} + +bool MMapHandle::Map(const char* path) { + this->UnMap(); + + const int fd = open(path, O_RDONLY); + if (fd == -1) { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, + "Could not open file to mmap: %s (%s).", strerror(errno), + path) + return false; + } + + const ScopeGuard close_fd_on_return([&fd] { + if (fd >= 0) { + close(fd); + } + }); + + struct stat file_stats; + if (fstat(fd, &file_stats)) { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, + "Could not access file stats to get size: %s (%s).", + strerror(errno), path) + return false; + } + + size_ = file_stats.st_size; +#if defined(_MSC_VER) + data_ = new uint8_t[size_]; + { + uint8_t* data_reader = data_; + size_t remaining_bytes = size_; + while (remaining_bytes > 0) { + const auto bytes = read(fd, data_reader, remaining_bytes); + if (bytes == -1) { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, + "Could not read file ('%s'): %s.", path, + strerror(errno)) + UnMap(); + return false; + } + remaining_bytes -= bytes; + data_reader += bytes; + } + } +#else + data_ = static_cast( + mmap(/*addr=*/nullptr, size_, PROT_READ, MAP_SHARED, fd, /*offset=*/0)); + if (data_ == MAP_FAILED) { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, "Could not mmap file: %s (%s).", + strerror(errno), path) + data_ = nullptr; + size_ = 0; + return false; + } +#endif + + return true; +} + +void MMapHandle::UnMap() { + if (data_) { +#if defined(_MSC_VER) + delete[] data_; +#else + munmap(data_, size_); +#endif + data_ = nullptr; + size_ = 0; + } +} + +void* WeightCacheBuilder::Reserve(size_t size) { + size_t offset = buffer_data_.size(); + const size_t misalign = offset % kMinAlignment; + if (misalign) { + size += kMinAlignment - misalign; + offset += kMinAlignment - misalign; + } + buffer_data_.resize(buffer_data_.size() + size); + return buffer_data_.data() + offset; +} + +bool WeightCacheBuilder::SpanIsWithinBuffer(const void* ptr, + uint64_t size) const { + const uintptr_t buf_begin = reinterpret_cast(buffer_data_.data()); + const uintptr_t buf_end = buf_begin + buffer_data_.size(); + const uintptr_t ptr_begin = reinterpret_cast(ptr); + const uintptr_t ptr_end = ptr_begin + size; + return ptr_begin >= buf_begin && ptr_begin <= buf_end && + ptr_end >= buf_begin && ptr_end <= buf_end; +} + +BufferLocation WeightCacheBuilder::Append(PackIdentifier pack_id, + const void* data, uint64_t size) { + const void* append_data = data; + if (!SpanIsWithinBuffer(data, size)) { + void* reserved_data = Reserve(size); + std::memcpy(reserved_data, data, size); + append_data = reserved_data; + } + BufferLocation loc{.offset = reinterpret_cast(append_data) - + reinterpret_cast(buffer_data_.data()), + .size = size}; + schema_.buffers.push_back(std::make_unique( + cache::schema::BufferT{.packing_algorithm_id = pack_id.pack_algorithm_id, + .weights_id = pack_id.weights_id, + .bias_id = pack_id.bias_id, + .offset = loc.offset, + .size = loc.size})); + return loc; +} + +bool WeightCacheBuilder::ShouldWrite() const { return !buffer_data_.empty(); } + +namespace { + +bool WriteData(const int fd, const uint8_t* data, size_t size, + const char* const file_path, const char* step_description) { + for (size_t bytes = 0; bytes < size;) { + const auto written_bytes = write(fd, data + bytes, size - bytes); + if (written_bytes == -1) { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, + "Cache file write incomplete (%s). %s: %s", file_path, + step_description, strerror(errno)) + } + bytes += written_bytes; + } + + return true; +} + +} // namespace + +bool WeightCacheBuilder::Write(const char* path) { + const int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd == -1) { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, + "Could not open cache file ('%s') for writing: %s", path, + strerror(errno)) + return false; + } + + const ScopeGuard close_fd_on_return([&fd] { + if (fd >= 0) { + close(fd); + } + }); + + flatbuffers::FlatBufferBuilder builder; + // Add a fake size and the base offset to mutate them afterwards. Otherwise + // space for it won't be added to the flatbuffer. + schema_.flatbuffer_size = 1; + schema_.base_offset = 1; + FinishPackedWeightsBuffer( + builder, cache::schema::PackedWeights::Pack(builder, &schema_)); + + // Mutate the flatbuffer size and base offset fields. + auto* mutable_packed_weights = + cache::schema::GetMutablePackedWeights(builder.GetBufferPointer()); + mutable_packed_weights->mutate_flatbuffer_size(builder.GetSize()); + const size_t misalign = builder.GetSize() % kMinAlignment; + const size_t alignment_offset = misalign ? kMinAlignment - misalign : 0; + mutable_packed_weights->mutate_base_offset(builder.GetSize() + + alignment_offset); + + // Write the flatbuffer which serves as a header to index the following data. + if (!WriteData(fd, builder.GetBufferPointer(), builder.GetSize(), path, + "Header")) { + return false; + } + // Add some padding so that the cache file can be mmaped and the buffers + // stay aligned correctly. + const uint8_t fill[kMinAlignment] = {0}; + if (!WriteData(fd, fill, alignment_offset, path, "Alignment padding")) { + return false; + } + // Write the actual buffer data. + if (!WriteData(fd, buffer_data_.data(), buffer_data_.size(), path, + "Buffer data")) { + return false; + } + return true; +} + +MMapWeightCacheProvider::MMapWeightCacheProvider( + MMapWeightCacheProvider&& other) { + *this = std::move(other); +} + +MMapWeightCacheProvider& MMapWeightCacheProvider::operator=( + MMapWeightCacheProvider&& other) { + using std::swap; + swap(cache_provider_, other.cache_provider_); + // The contexts need to keep pointing to their owning object. + cache_provider_.context = this; + other.cache_provider_.context = &other; + swap(file_path_, other.file_path_); + swap(buffer_address_to_identifier_, other.buffer_address_to_identifier_); + swap(cache_key_to_offset_, other.cache_key_to_offset_); + swap(mmap_handle_, other.mmap_handle_); + swap(mmap_buffer_base_offset_, other.mmap_buffer_base_offset_); + swap(builder_, other.builder_); + return *this; +} + +void MMapWeightCacheProvider::SetFilePath(const char* path) { + XNNPACK_ABORT_CHECK( + !IsFinalized(), + "Cannot change the path of a cache that has already been loaded."); + file_path_ = path; +} + +bool MMapWeightCacheProvider::Load(const std::string& path) { + file_path_ = path; + if (mmap_handle_.Map(path.c_str())) { + return Load(std::move(mmap_handle_)); + } + return false; +} + +bool MMapWeightCacheProvider::Load(MMapHandle&& handle) { + swap(mmap_handle_, handle); + // Verifiy the flabuffer part of the file. + const size_t verifier_size = + std::min(mmap_handle_.size(), + static_cast(FLATBUFFERS_MAX_BUFFER_SIZE - 1)); + flatbuffers::Verifier verifier(mmap_handle_.data(), verifier_size); + if (!cache::schema::VerifyPackedWeightsBuffer(verifier)) { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, + "Packed weights buffer validation failed."); + return false; + } + + // Load flatbuffer. + const cache::schema::PackedWeights* packed_weights = + cache::schema::GetPackedWeights(mmap_handle_.data()); + if (!packed_weights) { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, + "Could not get packed weights from flatbuffer."); + return false; + } + mmap_buffer_base_offset_ = packed_weights->base_offset(); + if (const auto buffers = packed_weights->buffers(); buffers) { + for (auto* buffer : *buffers) { + if (!buffer) { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, + "Invalid buffer address in buffer list."); + return false; + } + cache_key_to_offset_.emplace( + PackIdentifier{.pack_algorithm_id = buffer->packing_algorithm_id(), + .weights_id = buffer->weights_id(), + .bias_id = buffer->bias_id()}, + BufferLocation{.offset = buffer->offset(), .size = buffer->size()}); + } + } + return true; +} + +void MMapWeightCacheProvider::MapTensorIdentifiers( + const TfLiteTensor* tensors, const size_t size, + const std::unordered_map& tensor_index_to_identifier) { + for (const auto [index, identifier] : tensor_index_to_identifier) { + XNNPACK_ABORT_CHECK(index < size, + "Tensor index corresponds to a non existing tensor."); + buffer_address_to_identifier_[tensors[index].data.data] = identifier; + } +} + +size_t MMapWeightCacheProvider::LookUp( + const xnn_weights_cache_look_up_key* cache_key) { + if (!cache_key) { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, "A null cache key was provided."); + return SIZE_MAX; + } + const PackIdentifier pack_id = BuildPackIdentifier(*cache_key); + if (auto offset_it = cache_key_to_offset_.find(pack_id); + offset_it != cache_key_to_offset_.end()) { + return offset_it->second.offset; + } + return SIZE_MAX; +} + +void* MMapWeightCacheProvider::ReserveSpace(size_t size) { + XNNPACK_ABORT_CHECK(!IsFinalized(), + "Cannot reserve space in a finalized cache."); + return builder_.Reserve(size); +} + +size_t MMapWeightCacheProvider::LookUpOrInsert( + const xnn_weights_cache_look_up_key* cache_key, void* ptr, size_t size) { + XNNPACK_ABORT_CHECK(cache_key, "A null cache key was provided."); + + const PackIdentifier pack_id = BuildPackIdentifier(*cache_key); + if (auto offset_it = cache_key_to_offset_.find(pack_id); + offset_it != cache_key_to_offset_.end()) { + return offset_it->second.offset; + } + + XNNPACK_ABORT_CHECK(!IsFinalized(), + "Cannot insert a buffer in a finalized cache."); + + const BufferLocation location = builder_.Append(pack_id, ptr, size); + cache_key_to_offset_.emplace(pack_id, location); + return location.offset; +} + +void* MMapWeightCacheProvider::OffsetToAddr(const size_t offset) { + // While the cache is being built, the buffer could grow and need to be + // reallocated so we cannot ensure pointer stability. + XNNPACK_ABORT_CHECK( + IsFinalized(), + "Cannot get the address of a buffer in a non finalized cache."); + return mmap_handle_.data() + mmap_buffer_base_offset_ + offset; +} + +void MMapWeightCacheProvider::Reset() { + MMapWeightCacheProvider empty; + std::swap(*this, empty); +} + +bool MMapWeightCacheProvider::Finalize() { + if (IsFinalized()) { + return true; + } + if (file_path_.empty()) { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, + "File path wasn't set. Cannot finalize the cache."); + return false; + } + std::string file_path = file_path_; + if (!builder_.Write(file_path.c_str())) { + return false; + } + // The buffer mapping needs to be kept. We save it and restore it after the + // Reset. + std::unordered_map + buffer_address_to_identifier_backup = + std::move(buffer_address_to_identifier_); + Reset(); + buffer_address_to_identifier_ = + std::move(buffer_address_to_identifier_backup); + return Load(file_path); +} + +bool MMapWeightCacheProvider::IsFinalized() const { + return mmap_handle_.IsMapped(); +} + +size_t MMapWeightCacheProvider::look_up( + void* context, const xnn_weights_cache_look_up_key* cache_key) { + return reinterpret_cast(context)->LookUp(cache_key); +} + +void* MMapWeightCacheProvider::reserve_space(void* context, size_t n) { + return reinterpret_cast(context)->ReserveSpace(n); +} + +size_t MMapWeightCacheProvider::look_up_or_insert( + void* context, const xnn_weights_cache_look_up_key* cache_key, void* ptr, + size_t size) { + return reinterpret_cast(context)->LookUpOrInsert( + cache_key, ptr, size); +} + +bool MMapWeightCacheProvider::is_finalized(void* context) { + return reinterpret_cast(context)->IsFinalized(); +} + +void* MMapWeightCacheProvider::offset_to_addr(void* context, size_t offset) { + return reinterpret_cast(context)->OffsetToAddr( + offset); +} + +enum xnn_status MMapWeightCacheProvider::delete_cache(void* context) { + reinterpret_cast(context)->Reset(); + return xnn_status_success; +} + +PackIdentifier MMapWeightCacheProvider::BuildPackIdentifier( + const xnn_weights_cache_look_up_key& key) { + const auto get_buffer_id = [&](const void* buffer) -> size_t { + if (buffer) { + const auto identifier_it = buffer_address_to_identifier_.find(buffer); + XNNPACK_ABORT_CHECK(identifier_it != buffer_address_to_identifier_.end(), + "Unknown constant buffer passed to HashCacheKey."); + return identifier_it->second; + } + return PackIdentifier::kNoId; + }; + return PackIdentifier{.pack_algorithm_id = key.seed, + .weights_id = get_buffer_id(key.kernel), + .bias_id = get_buffer_id(key.bias)}; +} + +} // namespace tflite::xnnpack diff --git a/tensorflow/lite/delegates/xnnpack/weight_cache.h b/tensorflow/lite/delegates/xnnpack/weight_cache.h new file mode 100644 index 00000000000000..8942ea3beb3e9e --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/weight_cache.h @@ -0,0 +1,306 @@ +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_WEIGHT_CACHE_H_ +#define TENSORFLOW_LITE_DELEGATES_XNNPACK_WEIGHT_CACHE_H_ + +#include +#include +#include +#include +#include +#include + +#include "xnnpack.h" // from @XNNPACK +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/delegates/xnnpack/weight_cache_schema_generated.h" + +// WARNING: the interface in this file is still under experimentation and WILL +// CHANGE. Do not rely on it. + +// TFLite doesn't use absl hashing utilities. + +namespace tflite { +namespace xnnpack { + +struct PackIdentifier { + enum { kNoId = SIZE_MAX }; + uint64_t pack_algorithm_id = kNoId; + uint64_t weights_id = kNoId; + uint64_t bias_id = kNoId; + + friend bool operator==(const PackIdentifier& a, const PackIdentifier& b) { + return a.pack_algorithm_id == b.pack_algorithm_id && + a.weights_id == b.weights_id && a.bias_id == b.bias_id; + } + + struct Hash { + size_t operator()(const PackIdentifier& p) const { + std::hash hasher; + return hasher(p.pack_algorithm_id) ^ hasher(p.weights_id) ^ + hasher(p.bias_id); + } + }; +}; + +struct BufferLocation { + uint64_t offset; + uint64_t size; +}; + +// Handles MMap allocations lifetime. +// +// When mapped, provides a view over the allocation for convenience. +// +// WARNING: the interface in this file is still under experimentation and WILL +// CHANGE. Do not rely on it. +class MMapHandle { + public: + using value_type = uint8_t; + + MMapHandle() = default; + ~MMapHandle(); + MMapHandle(const MMapHandle&) = delete; + MMapHandle& operator=(const MMapHandle&) = delete; + MMapHandle(MMapHandle&&); + MMapHandle& operator=(MMapHandle&&); + + // Maps the file at the given path. + [[nodiscard /*Mapping a file can fail.*/]] + bool Map(const char* path); + + // Unmaps an existing mapping. + void UnMap(); + + // Returns true if a mapping exists. + bool IsMapped() const { return data_ != nullptr; } + + // Returns the mapping buffer. + uint8_t* data() { return data_; } + + // Returns the mapping buffer. + const uint8_t* data() const { return data_; } + + // Returns the mapping size in bytes. + size_t size() const { return size_; } + + uint8_t* begin() { return data(); } + + const uint8_t* begin() const { return data(); } + + uint8_t* end() { return data() + size(); } + + const uint8_t* end() const { return data() + size(); } + + friend void swap(MMapHandle& a, MMapHandle& b); + + private: + size_t size_ = 0; + uint8_t* data_ = nullptr; +}; + +// Provides storage to write the packed buffers to and saves those to disk. +// +// WARNING: the interface in this file is still under experimentation and WILL +// CHANGE. Do not rely on it. +class WeightCacheBuilder { + public: + // Reserves space in the data buffer for the required size in bytes and + // returns the address of that space. + // + // Sets `last_reserve` to the offset from `buffer_data_`'s start and `n`. + // + // A call to `Reserve` should alway be followed by a call to `Append`. + [[nodiscard /*The pointer to reserved space should be used.*/]] + void* Reserve(size_t size); + + // Adds a buffer to the cache. + // + // The buffer space must have been reserved before using `Reserve`. If not, a + // new call to `Reserve` will be done and the data will be copied over. + [[nodiscard /*The location to the appended data should be saved.*/]] + BufferLocation Append(PackIdentifier pack_id, const void* data, + uint64_t size); + + // Checks whether this builder has data that needs to be written to disk. + bool ShouldWrite() const; + + // Writes the flatbuffer to disk. + [[nodiscard /*Writing the weight cache can fail.*/]] + bool Write(const char* path); + + // Helper for testing. + // + // WARNING: this exposes class implementation details for testing purposes and + // may be removed at any time. + const std::vector& BufferData() const { return buffer_data_; } + + private: + bool SpanIsWithinBuffer(const void* ptr, uint64_t size) const; + + cache::schema::PackedWeightsT schema_; + std::vector buffer_data_; +}; + +// Allows XNNPack to directly load packed weights from disk instead of having to +// repack them every time. +// +// XNNPack kernels do not have knowledge of the TFLite context. The only thing +// they can access is the buffers address. We rely on the fact that the address +// provided by TFLite is unique in order to find out the buffer identifier. +// +// To use the cache you need to: +// +// - Map the buffer addresses to their identifier with `MapTensorIdentifiers` +// - Load the cache file. +// - Finalize the cache before calling the run functions of XNNPack (setup and +// reshape are ok). +class MMapWeightCacheProvider { + public: + MMapWeightCacheProvider() = default; + MMapWeightCacheProvider(const MMapWeightCacheProvider&) = delete; + MMapWeightCacheProvider& operator=(const MMapWeightCacheProvider&) = delete; + MMapWeightCacheProvider(MMapWeightCacheProvider&&); + MMapWeightCacheProvider& operator=(MMapWeightCacheProvider&&); + + // Changes the file path to save the cache to. + // + // WARNING: Can only be called if the cache isn't finalized. + void SetFilePath(const char* file_path); + + // Loads a flatbuffer following the layout in weight_cache_schema.fbs and set + // the file path. + [[nodiscard /*Loading a cache file may fail.*/]] + bool Load(const std::string& path); + + // Loads an MMap allocation following the layout in weight_cache_schema.fbs. + [[nodiscard /*Loading cache data may fail.*/]] + bool Load(MMapHandle&& mmap_handle); + + // Creates the tensor map. + void MapTensorIdentifiers( + const TfLiteTensor* tensors, size_t size, + const std::unordered_map& tensor_index_to_identifier); + + // Returns the offset of the buffer identified by `cache_key`. + // + // If the buffer isn't found, return SIZE_MAX. + [[nodiscard]] + size_t LookUp(const xnn_weights_cache_look_up_key* cache_key); + + // Reserves space for a buffer of given size and returns a pointer to it. + // + // The buffer data should be filled and `LookUpOrInsert` should be immediately + // called. + [[nodiscard]] + void* ReserveSpace(size_t size); + + // Returns the offset of the buffer identified by `cache_key`. If the lookup + // fails, inserts the span `[ptr, ptr+size)`. + // + // This should be called after ReserveSpace and `ptr` should be the result of + // that call with the given `size`. + // + // WARNING: The cache key cannot be null. + [[nodiscard]] + size_t LookUpOrInsert(const xnn_weights_cache_look_up_key* cache_key, + void* ptr, size_t size); + + // Gets the pointer to the buffer at the given offset. + // + // WARNING: This requires the buffer to be finalized. + // WARNING: This does not check the validity of the passed offset. + void* OffsetToAddr(size_t offset); + + // Resets the weight cache provider as if it had been default constructed. + void Reset(); + + // Ensures that the cache is ready. + // + // If the cache file already exists, this is a no-op. Otherwise, this writes + // the file to disk and reloads it. + [[nodiscard /*Writing the cache file may fail.*/]] + bool Finalize(); + + // Checks whether the cache is ready to be used. + bool IsFinalized() const; + + // Returns true if any weights have been added to the underlying builder. + bool IsBuilding() const { return !IsFinalized() && !file_path_.empty(); }; + + // Returns true if a file is mapped or a file path is set. + bool IsActive() const { return IsFinalized() || !file_path_.empty(); }; + + // Returns the cache provider expected by XNNPack. + xnn_weights_cache_provider& GetCacheProvider() { return cache_provider_; } + + // C interface: `xnn_weights_cache_provider` callback. + static size_t look_up(void* context, + const xnn_weights_cache_look_up_key* cache_key); + + // C interface: `xnn_weights_cache_provider` callback. + static void* reserve_space(void* context, size_t n); + + // C interface: `xnn_weights_cache_provider` callback. + static size_t look_up_or_insert( + void* context, const xnn_weights_cache_look_up_key* cache_key, void* ptr, + size_t size); + + // C interface: `xnn_weights_cache_provider` callback. + static bool is_finalized(void* context); + + // C interface: `xnn_weights_cache_provider` callback. + static void* offset_to_addr(void* context, size_t offset); + + // C interface: `xnn_weights_cache_provider` callback. + static enum xnn_status delete_cache(void* context); + + private: + // Hashes a cache key to lookup in `cache_key_to_identifier_`. + PackIdentifier BuildPackIdentifier(const xnn_weights_cache_look_up_key& key); + + // Cache provider implementation for XNNPack. + xnn_weights_cache_provider cache_provider_{ + .context = this, + .look_up = MMapWeightCacheProvider::look_up, + .reserve_space = MMapWeightCacheProvider::reserve_space, + .look_up_or_insert = MMapWeightCacheProvider::look_up_or_insert, + .is_finalized = MMapWeightCacheProvider::is_finalized, + .offset_to_addr = MMapWeightCacheProvider::offset_to_addr, + .delete_cache = MMapWeightCacheProvider::delete_cache}; + + // Path to the cache file. + std::string file_path_; + + // Maps buffer addresses to buffer identifiers. + std::unordered_map buffer_address_to_identifier_; + + // Maps cache request hashes to the buffer identifier. + std::unordered_multimap + cache_key_to_offset_; + + // MMap allocation handler. + MMapHandle mmap_handle_; + + // The offset to the first buffer data in the MMap allocation. + size_t mmap_buffer_base_offset_; + + // Used to build the cache. + WeightCacheBuilder builder_; +}; + +} // namespace xnnpack +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_XNNPACK_WEIGHT_CACHE_H_ diff --git a/tensorflow/lite/delegates/xnnpack/weight_cache_schema.fbs b/tensorflow/lite/delegates/xnnpack/weight_cache_schema.fbs new file mode 100644 index 00000000000000..0658054f21c07e --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/weight_cache_schema.fbs @@ -0,0 +1,52 @@ +// Copyright 2024 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a list of buffers with identifiers, to host the CPU-specific cache on disk. +namespace tflite.xnnpack.cache.schema; + +// Schema version. +file_identifier "V001"; +// File extension of written files. +file_extension "xnn_weights"; + +table Buffer { + // To uniquely identify a packed buffer we need to keep track of the packing + // algorithm and of the buffers that were used to generate it. + packing_algorithm_id: uint64; + weights_id: uint64; + bias_id: uint64; + + /// The buffer data is appended after the flatbuffer to bypass 2GB file size + /// limitation. The offset is calculated relative to the base offset. + /// (i.e. beginning of the file + base_offset). + offset: uint64; + + /// Size of the buffer in bytes. + size: uint64; +} + +table PackedWeights { + /// A list of buffers. + buffers: [Buffer]; + + /// The serialized file is `flatbuffer_size` of bytes representing + /// `NamedBuffers` appended with a blob representing the buffer content. + flatbuffer_size: uint64; + + /// Defines the base offset for the data appended to the file. That offset + /// may be needed to guarantee data alignment. + base_offset:uint64; +} + +root_type PackedWeights; diff --git a/tensorflow/lite/delegates/xnnpack/weight_cache_schema_generated.h b/tensorflow/lite/delegates/xnnpack/weight_cache_schema_generated.h new file mode 100755 index 00000000000000..fa5d30a4cdae65 --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/weight_cache_schema_generated.h @@ -0,0 +1,422 @@ +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// automatically generated by the FlatBuffers compiler, do not modify + + +#ifndef FLATBUFFERS_GENERATED_WEIGHTCACHESCHEMA_TFLITE_XNNPACK_CACHE_SCHEMA_H_ +#define FLATBUFFERS_GENERATED_WEIGHTCACHESCHEMA_TFLITE_XNNPACK_CACHE_SCHEMA_H_ + +#include "flatbuffers/flatbuffers.h" + +// Ensure the included flatbuffers.h is the same version as when this file was +// generated, otherwise it may not be compatible. +static_assert(FLATBUFFERS_VERSION_MAJOR == 24 && + FLATBUFFERS_VERSION_MINOR == 3 && + FLATBUFFERS_VERSION_REVISION == 25, + "Non-compatible flatbuffers version included"); + +namespace tflite { +namespace xnnpack { +namespace cache { +namespace schema { + +struct Buffer; +struct BufferBuilder; +struct BufferT; + +struct PackedWeights; +struct PackedWeightsBuilder; +struct PackedWeightsT; + +struct BufferT : public ::flatbuffers::NativeTable { + typedef Buffer TableType; + uint64_t packing_algorithm_id = 0; + uint64_t weights_id = 0; + uint64_t bias_id = 0; + uint64_t offset = 0; + uint64_t size = 0; +}; + +struct Buffer FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BufferT NativeTableType; + typedef BufferBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PACKING_ALGORITHM_ID = 4, + VT_WEIGHTS_ID = 6, + VT_BIAS_ID = 8, + VT_OFFSET = 10, + VT_SIZE = 12 + }; + uint64_t packing_algorithm_id() const { + return GetField(VT_PACKING_ALGORITHM_ID, 0); + } + bool mutate_packing_algorithm_id(uint64_t _packing_algorithm_id = 0) { + return SetField(VT_PACKING_ALGORITHM_ID, _packing_algorithm_id, 0); + } + uint64_t weights_id() const { + return GetField(VT_WEIGHTS_ID, 0); + } + bool mutate_weights_id(uint64_t _weights_id = 0) { + return SetField(VT_WEIGHTS_ID, _weights_id, 0); + } + uint64_t bias_id() const { + return GetField(VT_BIAS_ID, 0); + } + bool mutate_bias_id(uint64_t _bias_id = 0) { + return SetField(VT_BIAS_ID, _bias_id, 0); + } + /// The buffer data is appended after the flatbuffer to bypass 2GB file size + /// limitation. The offset is calculated relative to the base offset. + /// (i.e. beginning of the file + base_offset). + uint64_t offset() const { + return GetField(VT_OFFSET, 0); + } + bool mutate_offset(uint64_t _offset = 0) { + return SetField(VT_OFFSET, _offset, 0); + } + /// Size of the buffer in bytes. + uint64_t size() const { + return GetField(VT_SIZE, 0); + } + bool mutate_size(uint64_t _size = 0) { + return SetField(VT_SIZE, _size, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PACKING_ALGORITHM_ID, 8) && + VerifyField(verifier, VT_WEIGHTS_ID, 8) && + VerifyField(verifier, VT_BIAS_ID, 8) && + VerifyField(verifier, VT_OFFSET, 8) && + VerifyField(verifier, VT_SIZE, 8) && + verifier.EndTable(); + } + BufferT *UnPack(const ::flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BufferT *_o, const ::flatbuffers::resolver_function_t *_resolver = nullptr) const; + static ::flatbuffers::Offset Pack(::flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const ::flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct BufferBuilder { + typedef Buffer Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_packing_algorithm_id(uint64_t packing_algorithm_id) { + fbb_.AddElement(Buffer::VT_PACKING_ALGORITHM_ID, packing_algorithm_id, 0); + } + void add_weights_id(uint64_t weights_id) { + fbb_.AddElement(Buffer::VT_WEIGHTS_ID, weights_id, 0); + } + void add_bias_id(uint64_t bias_id) { + fbb_.AddElement(Buffer::VT_BIAS_ID, bias_id, 0); + } + void add_offset(uint64_t offset) { + fbb_.AddElement(Buffer::VT_OFFSET, offset, 0); + } + void add_size(uint64_t size) { + fbb_.AddElement(Buffer::VT_SIZE, size, 0); + } + explicit BufferBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBuffer( + ::flatbuffers::FlatBufferBuilder &_fbb, + uint64_t packing_algorithm_id = 0, + uint64_t weights_id = 0, + uint64_t bias_id = 0, + uint64_t offset = 0, + uint64_t size = 0) { + BufferBuilder builder_(_fbb); + builder_.add_size(size); + builder_.add_offset(offset); + builder_.add_bias_id(bias_id); + builder_.add_weights_id(weights_id); + builder_.add_packing_algorithm_id(packing_algorithm_id); + return builder_.Finish(); +} + +::flatbuffers::Offset CreateBuffer(::flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const ::flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct PackedWeightsT : public ::flatbuffers::NativeTable { + typedef PackedWeights TableType; + std::vector> buffers{}; + uint64_t flatbuffer_size = 0; + uint64_t base_offset = 0; + PackedWeightsT() = default; + PackedWeightsT(const PackedWeightsT &o); + PackedWeightsT(PackedWeightsT&&) FLATBUFFERS_NOEXCEPT = default; + PackedWeightsT &operator=(PackedWeightsT o) FLATBUFFERS_NOEXCEPT; +}; + +struct PackedWeights FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef PackedWeightsT NativeTableType; + typedef PackedWeightsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BUFFERS = 4, + VT_FLATBUFFER_SIZE = 6, + VT_BASE_OFFSET = 8 + }; + /// A list of buffers. + const ::flatbuffers::Vector<::flatbuffers::Offset> *buffers() const { + return GetPointer> *>(VT_BUFFERS); + } + ::flatbuffers::Vector<::flatbuffers::Offset> *mutable_buffers() { + return GetPointer<::flatbuffers::Vector<::flatbuffers::Offset> *>(VT_BUFFERS); + } + /// The serialized file is `flatbuffer_size` of bytes representing + /// `NamedBuffers` appended with a blob representing the buffer content. + uint64_t flatbuffer_size() const { + return GetField(VT_FLATBUFFER_SIZE, 0); + } + bool mutate_flatbuffer_size(uint64_t _flatbuffer_size = 0) { + return SetField(VT_FLATBUFFER_SIZE, _flatbuffer_size, 0); + } + /// Defines the base offset for the data appended to the file. That offset + /// may be needed to guarantee data alignment. + uint64_t base_offset() const { + return GetField(VT_BASE_OFFSET, 0); + } + bool mutate_base_offset(uint64_t _base_offset = 0) { + return SetField(VT_BASE_OFFSET, _base_offset, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_BUFFERS) && + verifier.VerifyVector(buffers()) && + verifier.VerifyVectorOfTables(buffers()) && + VerifyField(verifier, VT_FLATBUFFER_SIZE, 8) && + VerifyField(verifier, VT_BASE_OFFSET, 8) && + verifier.EndTable(); + } + PackedWeightsT *UnPack(const ::flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(PackedWeightsT *_o, const ::flatbuffers::resolver_function_t *_resolver = nullptr) const; + static ::flatbuffers::Offset Pack(::flatbuffers::FlatBufferBuilder &_fbb, const PackedWeightsT* _o, const ::flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct PackedWeightsBuilder { + typedef PackedWeights Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_buffers(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> buffers) { + fbb_.AddOffset(PackedWeights::VT_BUFFERS, buffers); + } + void add_flatbuffer_size(uint64_t flatbuffer_size) { + fbb_.AddElement(PackedWeights::VT_FLATBUFFER_SIZE, flatbuffer_size, 0); + } + void add_base_offset(uint64_t base_offset) { + fbb_.AddElement(PackedWeights::VT_BASE_OFFSET, base_offset, 0); + } + explicit PackedWeightsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreatePackedWeights( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> buffers = 0, + uint64_t flatbuffer_size = 0, + uint64_t base_offset = 0) { + PackedWeightsBuilder builder_(_fbb); + builder_.add_base_offset(base_offset); + builder_.add_flatbuffer_size(flatbuffer_size); + builder_.add_buffers(buffers); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreatePackedWeightsDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<::flatbuffers::Offset> *buffers = nullptr, + uint64_t flatbuffer_size = 0, + uint64_t base_offset = 0) { + auto buffers__ = buffers ? _fbb.CreateVector<::flatbuffers::Offset>(*buffers) : 0; + return tflite::xnnpack::cache::schema::CreatePackedWeights( + _fbb, + buffers__, + flatbuffer_size, + base_offset); +} + +::flatbuffers::Offset CreatePackedWeights(::flatbuffers::FlatBufferBuilder &_fbb, const PackedWeightsT *_o, const ::flatbuffers::rehasher_function_t *_rehasher = nullptr); + +inline BufferT *Buffer::UnPack(const ::flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new BufferT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void Buffer::UnPackTo(BufferT *_o, const ::flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = packing_algorithm_id(); _o->packing_algorithm_id = _e; } + { auto _e = weights_id(); _o->weights_id = _e; } + { auto _e = bias_id(); _o->bias_id = _e; } + { auto _e = offset(); _o->offset = _e; } + { auto _e = size(); _o->size = _e; } +} + +inline ::flatbuffers::Offset Buffer::Pack(::flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const ::flatbuffers::rehasher_function_t *_rehasher) { + return CreateBuffer(_fbb, _o, _rehasher); +} + +inline ::flatbuffers::Offset CreateBuffer(::flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const ::flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { ::flatbuffers::FlatBufferBuilder *__fbb; const BufferT* __o; const ::flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _packing_algorithm_id = _o->packing_algorithm_id; + auto _weights_id = _o->weights_id; + auto _bias_id = _o->bias_id; + auto _offset = _o->offset; + auto _size = _o->size; + return tflite::xnnpack::cache::schema::CreateBuffer( + _fbb, + _packing_algorithm_id, + _weights_id, + _bias_id, + _offset, + _size); +} + +inline PackedWeightsT::PackedWeightsT(const PackedWeightsT &o) + : flatbuffer_size(o.flatbuffer_size), + base_offset(o.base_offset) { + buffers.reserve(o.buffers.size()); + for (const auto &buffers_ : o.buffers) { buffers.emplace_back((buffers_) ? new tflite::xnnpack::cache::schema::BufferT(*buffers_) : nullptr); } +} + +inline PackedWeightsT &PackedWeightsT::operator=(PackedWeightsT o) FLATBUFFERS_NOEXCEPT { + std::swap(buffers, o.buffers); + std::swap(flatbuffer_size, o.flatbuffer_size); + std::swap(base_offset, o.base_offset); + return *this; +} + +inline PackedWeightsT *PackedWeights::UnPack(const ::flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new PackedWeightsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void PackedWeights::UnPackTo(PackedWeightsT *_o, const ::flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = buffers(); if (_e) { _o->buffers.resize(_e->size()); for (::flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->buffers[_i]) { _e->Get(_i)->UnPackTo(_o->buffers[_i].get(), _resolver); } else { _o->buffers[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); }; } } else { _o->buffers.resize(0); } } + { auto _e = flatbuffer_size(); _o->flatbuffer_size = _e; } + { auto _e = base_offset(); _o->base_offset = _e; } +} + +inline ::flatbuffers::Offset PackedWeights::Pack(::flatbuffers::FlatBufferBuilder &_fbb, const PackedWeightsT* _o, const ::flatbuffers::rehasher_function_t *_rehasher) { + return CreatePackedWeights(_fbb, _o, _rehasher); +} + +inline ::flatbuffers::Offset CreatePackedWeights(::flatbuffers::FlatBufferBuilder &_fbb, const PackedWeightsT *_o, const ::flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { ::flatbuffers::FlatBufferBuilder *__fbb; const PackedWeightsT* __o; const ::flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _buffers = _o->buffers.size() ? _fbb.CreateVector<::flatbuffers::Offset> (_o->buffers.size(), [](size_t i, _VectorArgs *__va) { return CreateBuffer(*__va->__fbb, __va->__o->buffers[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _flatbuffer_size = _o->flatbuffer_size; + auto _base_offset = _o->base_offset; + return tflite::xnnpack::cache::schema::CreatePackedWeights( + _fbb, + _buffers, + _flatbuffer_size, + _base_offset); +} + +inline const tflite::xnnpack::cache::schema::PackedWeights *GetPackedWeights(const void *buf) { + return ::flatbuffers::GetRoot(buf); +} + +inline const tflite::xnnpack::cache::schema::PackedWeights *GetSizePrefixedPackedWeights(const void *buf) { + return ::flatbuffers::GetSizePrefixedRoot(buf); +} + +inline PackedWeights *GetMutablePackedWeights(void *buf) { + return ::flatbuffers::GetMutableRoot(buf); +} + +inline tflite::xnnpack::cache::schema::PackedWeights *GetMutableSizePrefixedPackedWeights(void *buf) { + return ::flatbuffers::GetMutableSizePrefixedRoot(buf); +} + +inline const char *PackedWeightsIdentifier() { + return "V001"; +} + +inline bool PackedWeightsBufferHasIdentifier(const void *buf) { + return ::flatbuffers::BufferHasIdentifier( + buf, PackedWeightsIdentifier()); +} + +inline bool SizePrefixedPackedWeightsBufferHasIdentifier(const void *buf) { + return ::flatbuffers::BufferHasIdentifier( + buf, PackedWeightsIdentifier(), true); +} + +inline bool VerifyPackedWeightsBuffer( + ::flatbuffers::Verifier &verifier) { + return verifier.VerifyBuffer(PackedWeightsIdentifier()); +} + +inline bool VerifySizePrefixedPackedWeightsBuffer( + ::flatbuffers::Verifier &verifier) { + return verifier.VerifySizePrefixedBuffer(PackedWeightsIdentifier()); +} + +inline const char *PackedWeightsExtension() { + return "xnn_weights"; +} + +inline void FinishPackedWeightsBuffer( + ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) { + fbb.Finish(root, PackedWeightsIdentifier()); +} + +inline void FinishSizePrefixedPackedWeightsBuffer( + ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) { + fbb.FinishSizePrefixed(root, PackedWeightsIdentifier()); +} + +inline std::unique_ptr UnPackPackedWeights( + const void *buf, + const ::flatbuffers::resolver_function_t *res = nullptr) { + return std::unique_ptr(GetPackedWeights(buf)->UnPack(res)); +} + +inline std::unique_ptr UnPackSizePrefixedPackedWeights( + const void *buf, + const ::flatbuffers::resolver_function_t *res = nullptr) { + return std::unique_ptr(GetSizePrefixedPackedWeights(buf)->UnPack(res)); +} + +} // namespace schema +} // namespace cache +} // namespace xnnpack +} // namespace tflite + +#endif // FLATBUFFERS_GENERATED_WEIGHTCACHESCHEMA_TFLITE_XNNPACK_CACHE_SCHEMA_H_ diff --git a/tensorflow/lite/delegates/xnnpack/weight_cache_test.cc b/tensorflow/lite/delegates/xnnpack/weight_cache_test.cc new file mode 100644 index 00000000000000..a270c4f712918f --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/weight_cache_test.cc @@ -0,0 +1,725 @@ +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/delegates/xnnpack/weight_cache.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "xnnpack.h" // from @XNNPACK +#include "flatbuffers/verifier.h" // from @flatbuffers +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/delegates/xnnpack/weight_cache_schema_generated.h" + +namespace tflite::xnnpack { + +std::ostream& operator<<(std::ostream& os, const PackIdentifier& p) { + return os << "PackIdentifier{pack_algo: " << p.pack_algorithm_id + << ", weights_id: " << p.weights_id << ", bias_id: " << p.bias_id + << "}"; +} + +namespace { + +using testing::ElementsAreArray; +using testing::Ge; + +#ifndef XNN_TEST_WEIGHT_CACHE_TEMP_FILE_TEMPATE +#define XNN_TEST_WEIGHT_CACHE_TEMP_FILE_TEMPATE \ + "/tmp/weight_cache_test_file.XXXXXX" +#endif +constexpr const char kTempFileTemplate[] = + XNN_TEST_WEIGHT_CACHE_TEMP_FILE_TEMPATE; + +// Wraps a call to `mkstemp` to create temporary files. +class TempFileDesc { + public: + static constexpr struct AutoClose { + } kAutoCLose; + +#if defined(_MSC_VER) + TempFileDesc() : fd_() { + char filename[L_tmpnam_s]; + errno_t err = tmpnam_s(filename, L_tmpnam_s); + if (err) { + fprintf(stderr, "Could not create temporary filename.\n"); + std::abort(); + } + path_ = filename; + fd_ = open(path_.c_str(), O_CREAT | O_EXCL | O_RDWR, 0644); + if (fd_ < 0) { + fprintf(stderr, "Could not create temporary filename.\n"); + std::abort(); + } + } +#else + TempFileDesc() : fd_(mkstemp(path_.data())) { + if (GetFd() < 0) { + perror("Could not create temporary file"); + } + } +#endif + + explicit TempFileDesc(AutoClose) : TempFileDesc() { Close(); } + + TempFileDesc(const TempFileDesc&) = delete; + TempFileDesc& operator=(const TempFileDesc&) = delete; + + friend void swap(TempFileDesc& a, TempFileDesc& b) { + std::swap(a.path_, b.path_); + std::swap(a.fd_, b.fd_); + } + + TempFileDesc(TempFileDesc&& other) { swap(*this, other); } + TempFileDesc& operator=(TempFileDesc&& other) { + swap(*this, other); + return *this; + } + + ~TempFileDesc() { Close(); } + + void Close() { + if (GetFd() >= 0) { + close(fd_); + fd_ = -1; + } + } + + const std::string& GetPath() const { return path_; } + + const char* GetCPath() const { return path_.c_str(); } + + int GetFd() const { return fd_; } + + bool IsOpen() const { return fd_ >= 0; } + + private: + std::string path_ = kTempFileTemplate; + int fd_ = -1; +}; + +TEST(MMapHandleTest, DefaultConstructs) { + MMapHandle handle; + EXPECT_FALSE(handle.IsMapped()); + EXPECT_EQ(handle.data(), nullptr); + EXPECT_EQ(handle.size(), 0); +} + +TEST(MMapHandleTest, MapNonExitxingFileFails) { + // I hope this path doesn't exist... + const char* file_path = "sdbgfd"; + MMapHandle handle; + EXPECT_FALSE(handle.Map(file_path)); +} + +TEST(MMapHandleTest, MapExistingFileWorks) { + using std::size; + + const std::string payload = "This is some data in the file."; + + TempFileDesc tmp_file; + ASSERT_TRUE(tmp_file.IsOpen()); + write(tmp_file.GetFd(), payload.c_str(), size(payload)); + tmp_file.Close(); + + MMapHandle handle; + ASSERT_TRUE(handle.Map(tmp_file.GetCPath())); + EXPECT_TRUE(handle.IsMapped()); + EXPECT_NE(handle.data(), nullptr); + EXPECT_THAT(handle.size(), Ge(size(payload))); + EXPECT_THAT(handle, ElementsAreArray(payload)); + + handle.UnMap(); + EXPECT_FALSE(handle.IsMapped()); + EXPECT_EQ(handle.data(), nullptr); + EXPECT_EQ(handle.size(), 0); +} + +TEST(MMapHandleTest, MoveConstructs) { + const std::string payload = "This is some data in the file."; + + TempFileDesc tmp_file; + ASSERT_TRUE(tmp_file.IsOpen()); + write(tmp_file.GetFd(), payload.c_str(), size(payload)); + tmp_file.Close(); + + MMapHandle handle; + ASSERT_TRUE(handle.Map(tmp_file.GetCPath())); + + MMapHandle handle2(std::move(handle)); + + // We are checking that the moved from handle has lost control over the data. + // NOLINTBEGIN(bugprone-use-after-move) + EXPECT_FALSE(handle.IsMapped()); + EXPECT_EQ(handle.data(), nullptr); + EXPECT_EQ(handle.size(), 0); + // NOLINTEND(bugprone-use-after-move) + + EXPECT_TRUE(handle2.IsMapped()); + EXPECT_NE(handle2.data(), nullptr); + EXPECT_THAT(handle2.size(), Ge(size(payload))); + EXPECT_THAT(handle2, ElementsAreArray(payload)); +} + +TEST(WeightCacheBuilderTest, ReserveAppendWriteWorks) { + using std::size; + + const std::string payload = "This is some data in the file."; + const PackIdentifier dummy_id{1, 2, 3}; + + WeightCacheBuilder builder; + + const size_t payload_size = size(payload); + void* buffer = builder.Reserve(payload_size); + std::memcpy(buffer, payload.c_str(), payload_size); + auto loc = builder.Append(dummy_id, buffer, payload_size); + + EXPECT_EQ(loc.size, payload_size); + EXPECT_EQ(builder.BufferData().size(), payload_size); + EXPECT_TRUE(builder.ShouldWrite()); + + TempFileDesc tmp_file; + ASSERT_TRUE(tmp_file.IsOpen()); + tmp_file.Close(); + + ASSERT_TRUE(builder.Write(tmp_file.GetCPath())); + + MMapHandle handle; + ASSERT_TRUE(handle.Map(tmp_file.GetCPath())); + + const cache::schema::PackedWeights* const packed_weights = + cache::schema::GetPackedWeights(handle.data()); + ASSERT_NE(packed_weights, nullptr); + EXPECT_LE(packed_weights->flatbuffer_size(), size(handle) - size(payload)); + ASSERT_NE(packed_weights->buffers(), nullptr); + ASSERT_EQ(packed_weights->buffers()->size(), 1); + ASSERT_NE(packed_weights->buffers()->Get(0), nullptr); + ASSERT_EQ(packed_weights->buffers()->Get(0)->size(), size(payload)); + EXPECT_EQ(packed_weights->buffers()->Get(0)->offset(), 0); + ASSERT_EQ(packed_weights->buffers()->Get(0)->packing_algorithm_id(), + dummy_id.pack_algorithm_id); + ASSERT_EQ(packed_weights->buffers()->Get(0)->weights_id(), + dummy_id.weights_id); + ASSERT_EQ(packed_weights->buffers()->Get(0)->bias_id(), dummy_id.bias_id); + + flatbuffers::Verifier verifier(handle.data(), handle.size()); + EXPECT_TRUE(cache::schema::VerifyPackedWeightsBuffer(verifier)) + << packed_weights->flatbuffer_size() << " " << handle.size() << " " + << packed_weights->buffers()->size() << "\n" + << tmp_file.GetPath(); +} + +TEST(WeightCacheBuilderTest, AppendWithoutReserveWriteWorks) { + using std::size; + + const std::string payload = "This is some data in the file."; + const PackIdentifier dummy_id{1, 2, 3}; + + WeightCacheBuilder builder; + + const size_t payload_size = size(payload); + auto loc = builder.Append(dummy_id, payload.c_str(), payload_size); + + EXPECT_EQ(loc.size, payload_size); + EXPECT_EQ(builder.BufferData().size(), payload_size); + EXPECT_TRUE(builder.ShouldWrite()); + + TempFileDesc tmp_file; + ASSERT_TRUE(tmp_file.IsOpen()); + tmp_file.Close(); + + ASSERT_TRUE(builder.Write(tmp_file.GetCPath())); + + MMapHandle handle; + ASSERT_TRUE(handle.Map(tmp_file.GetCPath())); + + const cache::schema::PackedWeights* const packed_weights = + cache::schema::GetPackedWeights(handle.data()); + ASSERT_NE(packed_weights, nullptr); + EXPECT_LE(packed_weights->flatbuffer_size(), size(handle) - size(payload)); + ASSERT_NE(packed_weights->buffers(), nullptr); + ASSERT_EQ(packed_weights->buffers()->size(), 1); + ASSERT_NE(packed_weights->buffers()->Get(0), nullptr); + ASSERT_EQ(packed_weights->buffers()->Get(0)->size(), size(payload)); + EXPECT_EQ(packed_weights->buffers()->Get(0)->offset(), 0); + ASSERT_EQ(packed_weights->buffers()->Get(0)->packing_algorithm_id(), + dummy_id.pack_algorithm_id); + ASSERT_EQ(packed_weights->buffers()->Get(0)->weights_id(), + dummy_id.weights_id); + ASSERT_EQ(packed_weights->buffers()->Get(0)->bias_id(), dummy_id.bias_id); + + flatbuffers::Verifier verifier(handle.data(), handle.size()); + EXPECT_TRUE(cache::schema::VerifyPackedWeightsBuffer(verifier)) + << packed_weights->flatbuffer_size() << " " << handle.size() << " " + << packed_weights->buffers()->size() << "\n" + << tmp_file.GetPath(); +} + +TEST(WeightCacheBuilderTest, NonExistingPathFails) { + using std::size; + + const std::string payload = "This is some data in the file."; + const PackIdentifier dummy_id{1, 2, 3}; + + WeightCacheBuilder builder; + + const size_t payload_size = size(payload); + auto loc = builder.Append(dummy_id, payload.c_str(), payload_size); + + EXPECT_EQ(loc.size, payload_size); + EXPECT_EQ(builder.BufferData().size(), payload_size); + EXPECT_TRUE(builder.ShouldWrite()); + + EXPECT_FALSE(builder.Write("")); + EXPECT_FALSE(builder.Write("/selktjdsljf")); +} + +struct FakeContext { + // Adds a new tensor and it's backing buffer to the context. + // + // The tensor `data` will not be set until `FinalizeTensors` is called. + void AddTensor(int buffer_identifier, size_t size) { + buffers.emplace_back(size, buffer_identifier); + tensors.push_back({}); + tensors.back().allocation_type = kTfLiteMmapRo; + tensor_buffer_identifiers[tensors.size() - 1] = buffer_identifier; + } + + // Updates the tensor data mappings. + // + // This needs to be called every time the context `tensors` list is + // reallocated (mainly because of insertions). + void FinalizeTensors() { + for (size_t i = 0; i < tensors.size(); ++i) { + tensors[i].data.data = buffers[i].data(); + tensors[i].bytes = buffers[i].size(); + } + } + + // Creates a look up key for the XNNPack weight provider C interface. + xnn_weights_cache_look_up_key LookUpKey(const uint32_t algorithm_seed, + const int weights_index) const { + return {.seed = algorithm_seed, + .kernel = buffers[weights_index].data(), + .bias = nullptr}; + } + + // Creates a look up key for the XNNPack weight provider C interface. + xnn_weights_cache_look_up_key LookUpKey(const uint32_t algorithm_seed, + const int weights_index, + const int bias_index) const { + return {.seed = algorithm_seed, + .kernel = buffers[weights_index].data(), + .bias = buffers[bias_index].data()}; + } + + // Helps creating fake packed data. + void AddTensorToPack(std::vector& pack_buffer, int index) { + const std::vector& buffer = buffers[index]; + pack_buffer.resize(std::max(size(pack_buffer), size(buffer))); + for (size_t i = 0; i < size(buffer); ++i) { + pack_buffer[i] ^= buffer[i]; + } + } + + // Packs the referenced tensors into one buffer. + // + // Returns the pack id to retrieve the packed reference data from + // `packed_buffers`. + template + PackIdentifier PackTensors(xnn_weights_cache_t weight_cache, + const uint32_t algorithm_seed, + const Ids... tensor_indices) { + // Create fake packed and save the result for later lookup tests. + + PackIdentifier pack_id{algorithm_seed, + tensor_buffer_identifiers[tensor_indices]...}; + PackedBuffer& packed = + packed_buffers.emplace(pack_id, PackedBuffer{})->second; + (AddTensorToPack(packed.buffer, tensor_indices), ...); + + // Add the packed buffer to the XNNPack cache. Normaly you would pack in + // place where the reserved space is. + xnn_weights_cache_look_up_key look_up_key = + LookUpKey(algorithm_seed, tensor_indices...); + packed.offset = weight_cache->look_up_or_insert( + weight_cache->context, &look_up_key, packed.buffer.data(), + packed.buffer.size()); + return pack_id; + } + + struct PackedBuffer { + size_t offset; + std::vector buffer; + }; + + std::vector tensors; + std::vector> buffers; + std::unordered_multimap + packed_buffers; + std::unordered_map tensor_buffer_identifiers; +}; + +struct BuildMMapWeightCacheProviderTest : testing::Test { + enum { kAlgoSeed1, kAlgoSeed2, kAlgoSeed3 }; + enum { kBufferId1, kBufferId2, kBufferId3, kBufferId4 }; + + void SetUp() override { + AddTensors(); + EndSetup(); + } + + void AddTensors() { + ctx.AddTensor(/*buffer_identifier=*/kBufferId1, /*size=*/12); + ctx.AddTensor(/*buffer_identifier=*/kBufferId2, /*size=*/43); + ctx.AddTensor(/*buffer_identifier=*/kBufferId3, /*size=*/64); + ctx.AddTensor(/*buffer_identifier=*/kBufferId4, /*size=*/8); + } + + void EndSetup() { + ctx.FinalizeTensors(); + cache_provider.MapTensorIdentifiers(ctx.tensors.data(), ctx.tensors.size(), + ctx.tensor_buffer_identifiers); + } + + FakeContext ctx; + MMapWeightCacheProvider cache_provider; +}; + +TEST_F(BuildMMapWeightCacheProviderTest, LookUpFailsIfKeyDoesntMatch) { + xnn_weights_cache_look_up_key look_up_key{}; + EXPECT_EQ(cache_provider.LookUp(&look_up_key), SIZE_MAX); +} + +TEST_F(BuildMMapWeightCacheProviderTest, LookUpSucceeds) { + enum { kWeightIndex, kBiasIndex }; + const auto pack_id = ctx.PackTensors(&cache_provider.GetCacheProvider(), + kAlgoSeed1, kWeightIndex, kBiasIndex); + const xnn_weights_cache_look_up_key look_up_key = + ctx.LookUpKey(kAlgoSeed1, kWeightIndex, kBiasIndex); + + EXPECT_EQ(cache_provider.LookUp(&look_up_key), + ctx.packed_buffers.find(pack_id)->second.offset); +} + +TEST_F(BuildMMapWeightCacheProviderTest, + DifferentAlgoSeedsSameTensorsDontConflict) { + enum { kWeightIndex, kBiasIndex }; + const auto pack_id_1 = ctx.PackTensors(&cache_provider.GetCacheProvider(), + kAlgoSeed1, kWeightIndex, kBiasIndex); + const auto pack_id_2 = ctx.PackTensors(&cache_provider.GetCacheProvider(), + kAlgoSeed2, kWeightIndex, kBiasIndex); + + const xnn_weights_cache_look_up_key look_up_key_1 = + ctx.LookUpKey(kAlgoSeed1, kWeightIndex, kBiasIndex); + const xnn_weights_cache_look_up_key look_up_key_2 = + ctx.LookUpKey(kAlgoSeed2, kWeightIndex, kBiasIndex); + + EXPECT_EQ(cache_provider.LookUp(&look_up_key_1), + ctx.packed_buffers.find(pack_id_1)->second.offset); + EXPECT_EQ(cache_provider.LookUp(&look_up_key_2), + ctx.packed_buffers.find(pack_id_2)->second.offset); + EXPECT_NE(cache_provider.LookUp(&look_up_key_1), + cache_provider.LookUp(&look_up_key_2)); +} + +TEST_F(BuildMMapWeightCacheProviderTest, + SameAlgoSeedDifferentTensorsDontConflict) { + enum { kWeightIndex1, kWeightIndex2, kBiasIndex1, kBiasIndex2 }; + const auto pack_id_1 = + ctx.PackTensors(&cache_provider.GetCacheProvider(), kAlgoSeed1, + kWeightIndex1, kBiasIndex1); + const auto pack_id_2 = + ctx.PackTensors(&cache_provider.GetCacheProvider(), kAlgoSeed1, + kWeightIndex2, kBiasIndex1); + const auto pack_id_3 = + ctx.PackTensors(&cache_provider.GetCacheProvider(), kAlgoSeed1, + kWeightIndex1, kBiasIndex2); + const auto pack_id_4 = + ctx.PackTensors(&cache_provider.GetCacheProvider(), kAlgoSeed1, + kWeightIndex2, kBiasIndex2); + + const xnn_weights_cache_look_up_key look_up_key_1 = + ctx.LookUpKey(kAlgoSeed1, kWeightIndex1, kBiasIndex1); + const xnn_weights_cache_look_up_key look_up_key_2 = + ctx.LookUpKey(kAlgoSeed1, kWeightIndex2, kBiasIndex1); + const xnn_weights_cache_look_up_key look_up_key_3 = + ctx.LookUpKey(kAlgoSeed1, kWeightIndex1, kBiasIndex2); + const xnn_weights_cache_look_up_key look_up_key_4 = + ctx.LookUpKey(kAlgoSeed1, kWeightIndex2, kBiasIndex2); + + EXPECT_EQ(cache_provider.LookUp(&look_up_key_1), + ctx.packed_buffers.find(pack_id_1)->second.offset); + EXPECT_EQ(cache_provider.LookUp(&look_up_key_2), + ctx.packed_buffers.find(pack_id_2)->second.offset); + EXPECT_EQ(cache_provider.LookUp(&look_up_key_3), + ctx.packed_buffers.find(pack_id_3)->second.offset); + EXPECT_EQ(cache_provider.LookUp(&look_up_key_4), + ctx.packed_buffers.find(pack_id_4)->second.offset); + EXPECT_NE(cache_provider.LookUp(&look_up_key_1), + cache_provider.LookUp(&look_up_key_2)); + EXPECT_NE(cache_provider.LookUp(&look_up_key_1), + cache_provider.LookUp(&look_up_key_3)); + EXPECT_NE(cache_provider.LookUp(&look_up_key_1), + cache_provider.LookUp(&look_up_key_4)) + << pack_id_1 << " " << pack_id_4; + EXPECT_NE(cache_provider.LookUp(&look_up_key_2), + cache_provider.LookUp(&look_up_key_3)); + EXPECT_NE(cache_provider.LookUp(&look_up_key_2), + cache_provider.LookUp(&look_up_key_4)); + EXPECT_NE(cache_provider.LookUp(&look_up_key_3), + cache_provider.LookUp(&look_up_key_4)); +} + +TEST_F(BuildMMapWeightCacheProviderTest, FinalizeWorks) { + enum { kWeightIndex1, kBiasIndex, kWeightIndex2 }; + TempFileDesc tmp_file; + + ctx.PackTensors(&cache_provider.GetCacheProvider(), kAlgoSeed1, kWeightIndex1, + kBiasIndex); + ctx.PackTensors(&cache_provider.GetCacheProvider(), kAlgoSeed2, + kWeightIndex2); + + EXPECT_FALSE(cache_provider.Finalize()); + cache_provider.SetFilePath(tmp_file.GetCPath()); + + EXPECT_TRUE(cache_provider.IsActive()); + EXPECT_TRUE(cache_provider.IsBuilding()); + ASSERT_TRUE(cache_provider.Finalize()); + + ASSERT_TRUE(cache_provider.IsFinalized()); +} + +struct LoadMMapWeightCacheProviderTest : BuildMMapWeightCacheProviderTest { + enum { kWeightIndex1, kBiasIndex, kWeightIndex2 }; + + void SetUp() override { + BuildMMapWeightCacheProviderTest::SetUp(); + cache_provider.SetFilePath(tmp_file.GetCPath()); + + pack_id_1 = ctx.PackTensors(&cache_provider.GetCacheProvider(), kAlgoSeed1, + kWeightIndex1, kBiasIndex); + pack_id_2 = ctx.PackTensors(&cache_provider.GetCacheProvider(), kAlgoSeed2, + kWeightIndex2); + + ASSERT_TRUE(cache_provider.Finalize()); + ASSERT_TRUE(cache_provider.IsFinalized()); + } + + xnn_weights_cache_look_up_key LookUpKey1() const { + return ctx.LookUpKey(kAlgoSeed1, kWeightIndex1, kBiasIndex); + } + + xnn_weights_cache_look_up_key LookUpKey2() const { + return ctx.LookUpKey(kAlgoSeed2, kWeightIndex2); + } + + TempFileDesc tmp_file; + PackIdentifier pack_id_1; + PackIdentifier pack_id_2; +}; + +TEST_F(LoadMMapWeightCacheProviderTest, LookUpFailsIfKeyDoesntMatch) { + xnn_weights_cache_look_up_key look_up_key{}; + EXPECT_EQ(cache_provider.LookUp(&look_up_key), SIZE_MAX); +} + +template +class LightSpan { + public: + using value_type = T; + + LightSpan(const void* data, const size_t size) + : ptr_(reinterpret_cast(data)), size_(size) {} + + const T* begin() const { return ptr_; } + const T* end() const { return ptr_ + size_; } + + private: + T* ptr_; + size_t size_; +}; + +TEST_F(LoadMMapWeightCacheProviderTest, LookUpSucceeds) { + const auto& reference_1 = ctx.packed_buffers.find(pack_id_1)->second; + const auto& reference_2 = ctx.packed_buffers.find(pack_id_2)->second; + + const xnn_weights_cache_look_up_key look_up_key_1 = LookUpKey1(); + const xnn_weights_cache_look_up_key look_up_key_2 = LookUpKey2(); + + const uint64_t offset_1 = cache_provider.LookUp(&look_up_key_1); + const uint64_t offset_2 = cache_provider.LookUp(&look_up_key_2); + + ASSERT_EQ(offset_1, reference_1.offset); + ASSERT_EQ(offset_2, reference_2.offset); + + const void* const addr_1 = cache_provider.OffsetToAddr(offset_1); + const void* const addr_2 = cache_provider.OffsetToAddr(offset_2); + + ASSERT_NE(addr_1, nullptr); + ASSERT_NE(addr_2, nullptr); + + EXPECT_THAT(LightSpan(addr_1, reference_1.buffer.size()), + ElementsAreArray(reference_1.buffer)); + EXPECT_THAT(LightSpan(addr_2, reference_2.buffer.size()), + ElementsAreArray(reference_2.buffer)); +} + +TEST(MMapWeightCacheProviderTest, XnnpackCApiJourney) { + using std::size; + TempFileDesc temp_fd(TempFileDesc::kAutoCLose); + const int32_t fake_packing_algo_seed = 0xBA0BAB; + const char packed_data_ref_1[] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + const char packed_data_ref_2[] = {26, 32, 43, 59, 34, 65, 80, 101}; + auto bytes = [](const auto& array) { return size(array) * sizeof(array[0]); }; + + constexpr int kBufferCount = 10; + // We are going to feed dummy packed data. We only need a valid pointer + // address to map to a buffer identifier. + char fake_buffer_pointer[kBufferCount] = {0}; + + { // Build and reload scenario. + TfLiteTensor tensors[kBufferCount]; + std::unordered_map tensor_buffer_identifiers; + for (int i = 0; i < kBufferCount; ++i) { + tensors[0].data.data = (void*)(fake_buffer_pointer + i); + tensor_buffer_identifiers[i] = i + 1; + } + + MMapWeightCacheProvider cache_provider; + cache_provider.SetFilePath(temp_fd.GetCPath()); + + xnn_weights_cache_t cache = &cache_provider.GetCacheProvider(); + cache_provider.MapTensorIdentifiers(tensors, size(tensors), + tensor_buffer_identifiers); + + const xnn_weights_cache_look_up_key look_up_key_1{ + .seed = fake_packing_algo_seed, + .kernel = tensors[0].data.data, + .bias = tensors[1].data.data}; + + // Lookup non-packed tensor. + ASSERT_EQ(cache->look_up(cache, &look_up_key_1), SIZE_MAX); + // Reserve space, write data and add packed data. + void* const reserved_ptr = + cache->reserve_space(cache, bytes(packed_data_ref_1)); + ASSERT_NE(reserved_ptr, nullptr); + std::memcpy(reserved_ptr, packed_data_ref_1, bytes(packed_data_ref_1)); + const size_t build_offset_1 = cache->look_up_or_insert( + cache, &look_up_key_1, reserved_ptr, bytes(packed_data_ref_1)); + + // Check that a second insertion with the same key returns the same offset. + const size_t build_offset_redundant = cache->look_up_or_insert( + cache, &look_up_key_1, reserved_ptr, bytes(packed_data_ref_1)); + EXPECT_EQ(build_offset_1, build_offset_redundant); + + // Lookup newly packed tensor. + ASSERT_EQ(cache->look_up(cache, &look_up_key_1), build_offset_1); + + // Add a tensor without reserving before. + const xnn_weights_cache_look_up_key look_up_key_2{ + .seed = fake_packing_algo_seed, + .kernel = tensors[2].data.data, + .bias = tensors[3].data.data}; + const size_t build_offset_2 = cache->look_up_or_insert( + cache, &look_up_key_2, (void*)packed_data_ref_2, + bytes(packed_data_ref_2)); + + // Save the cache to disk and reload. + ASSERT_TRUE(cache_provider.Finalize()); + + ASSERT_TRUE(cache->is_finalized(cache)); + + const size_t reload_offset_1 = cache->look_up(cache, &look_up_key_1); + ASSERT_EQ(reload_offset_1, build_offset_1); + + const void* const loaded_packed_data_1 = + cache->offset_to_addr(cache, reload_offset_1); + ASSERT_NE(loaded_packed_data_1, nullptr); + EXPECT_THAT( + LightSpan(loaded_packed_data_1, size(packed_data_ref_1)), + ElementsAreArray(packed_data_ref_1)); + + const size_t reload_offset_2 = cache->look_up(cache, &look_up_key_2); + ASSERT_EQ(reload_offset_2, build_offset_2); + + const void* const loaded_packed_data_2 = + cache->offset_to_addr(cache, reload_offset_2); + ASSERT_NE(loaded_packed_data_2, nullptr); + EXPECT_THAT( + LightSpan(loaded_packed_data_2, size(packed_data_ref_2)), + ElementsAreArray(packed_data_ref_2)); + } + + { // Load existing cache scenario. + TfLiteTensor tensors[kBufferCount]; + std::unordered_map tensor_buffer_identifiers; + for (int i = 0; i < kBufferCount; ++i) { + tensors[0].data.data = (void*)(fake_buffer_pointer + i); + tensor_buffer_identifiers[i] = i + 1; + } + + MMapWeightCacheProvider cache_provider; + ASSERT_TRUE(cache_provider.Load(temp_fd.GetCPath())); + + xnn_weights_cache_t cache = &cache_provider.GetCacheProvider(); + cache_provider.MapTensorIdentifiers(tensors, size(tensors), + tensor_buffer_identifiers); + + const xnn_weights_cache_look_up_key look_up_key_1{ + .seed = fake_packing_algo_seed, + .kernel = tensors[0].data.data, + .bias = tensors[1].data.data}; + + const xnn_weights_cache_look_up_key look_up_key_2{ + .seed = fake_packing_algo_seed, + .kernel = tensors[2].data.data, + .bias = tensors[3].data.data}; + + ASSERT_TRUE(cache->is_finalized(cache)); + + const size_t offset_1 = cache->look_up(cache, &look_up_key_1); + const void* const loaded_packed_data_1 = + cache->offset_to_addr(cache, offset_1); + ASSERT_NE(loaded_packed_data_1, nullptr); + EXPECT_THAT( + LightSpan(loaded_packed_data_1, size(packed_data_ref_1)), + ElementsAreArray(packed_data_ref_1)); + + const size_t offset_2 = cache->look_up(cache, &look_up_key_2); + const void* const loaded_packed_data_2 = + cache->offset_to_addr(cache, offset_2); + ASSERT_NE(loaded_packed_data_2, nullptr); + EXPECT_THAT( + LightSpan(loaded_packed_data_2, size(packed_data_ref_2)), + ElementsAreArray(packed_data_ref_2)); + } +} + +} // namespace +} // namespace tflite::xnnpack diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc index 76cc6dba209ab9..1503fea039580f 100644 --- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc +++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc @@ -38,7 +38,9 @@ limitations under the License. #include "tensorflow/lite/core/api/profiler.h" #include "tensorflow/lite/core/c/builtin_op_data.h" #include "tensorflow/lite/core/c/common.h" +#include "tensorflow/lite/core/subgraph.h" #include "tensorflow/lite/delegates/xnnpack/quantization_util.h" +#include "tensorflow/lite/delegates/xnnpack/weight_cache.h" #include "tensorflow/lite/kernels/cpu_backend_context.h" #include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" @@ -487,6 +489,8 @@ class VariableHolder { std::map global_id_to_dims_and_type_; }; +class Subgraph; + class Delegate { friend class Subgraph; @@ -525,6 +529,29 @@ class Delegate { options != nullptr ? *options : TfLiteXNNPackDelegateOptionsDefault(); delegate_.flags = GetXNNPackDelegateFlags(); workspace_.reset(workspace); + + // If no weight cache is provided, add one when requested. + if (!options_.weights_cache) { + if (options_.experimental_weight_cache_file_path) { + if (weight_cache_provider_.Load( + options_.experimental_weight_cache_file_path)) { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO, + "XNNPack weight cache loaded from '%s'.", + options_.experimental_weight_cache_file_path); + } else { + TFLITE_LOG_PROD( + tflite::TFLITE_LOG_INFO, + "XNNPack weight cache not found at '%s', building it.", + options_.experimental_weight_cache_file_path); + } + options_.weights_cache = + reinterpret_cast( + weight_cache_provider_.GetCacheProvider().context); + } else { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO, + "XNNPack weight cache not enabled."); + } + } } TfLiteIntArray* PrepareOpsToDelegate(TfLiteContext* context); @@ -711,6 +738,10 @@ class Delegate { TfLiteXNNPackDelegateOptions options_{}; VariableHolder variable_holder_; std::mutex workspace_mutex_; + + // If no weight cache is provided and a cache is set in the delegate options, + // this will be used as a weight cache. + MMapWeightCacheProvider weight_cache_provider_; }; class Subgraph { @@ -781,6 +812,13 @@ class Subgraph { static Subgraph* Create(TfLiteContext* context, const TfLiteDelegateParams* params, Delegate& delegate) { + // Map tensors identifiers before packing anything. + if (delegate.weight_cache_provider_.IsActive()) { + delegate.weight_cache_provider_.MapTensorIdentifiers( + context->tensors, context->tensors_size, + reinterpret_cast(context->impl_) + ->GetTensorBufferIdentifiers()); + } // Convert subgraph inputs and outputs to hash sets for faster lookup. const std::unordered_set inputs( ¶ms->input_tensors->data[0], @@ -1121,6 +1159,18 @@ class Subgraph { TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node, bool enable_subgraph_reshaping, Delegate* delegate) { std::lock_guard lock(delegate->workspace_mutex_); + + // The weights cache needs to be finalized only once. Prepare will be called + // for each partition after all the partitions have been created (therefore + // all the weights are known and have been packed). + if (delegate->weight_cache_provider_.IsActive()) { + if (!delegate->weight_cache_provider_.Finalize()) { + TF_LITE_KERNEL_LOG(context, + "XNNPack delegate failed to finalize cache."); + return kTfLiteError; + } + } + if (enable_subgraph_reshaping) { xnn_status status = xnn_status_invalid_state; for (int i = 0; i < inputs_.size(); ++i) { @@ -1170,10 +1220,8 @@ class Subgraph { return kTfLiteError; } } - return kTfLiteOk; - } else { - return kTfLiteOk; } + return kTfLiteOk; } TfLiteStatus Invoke(TfLiteContext* context, bool enable_subgraph_reshaping, diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h index aa11998dc0fc49..1d40c04db940ac 100644 --- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h +++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h @@ -70,6 +70,10 @@ typedef struct { bool handle_variable_ops; // Enable adaptive optimization for AVX CPUs. bool experimental_adaptive_avx_optimization; + // Path to the weight cache to load if `weight_cache` is undefined. + // + // WARNING this is an experimental flag. + const char* experimental_weight_cache_file_path; } TfLiteXNNPackDelegateOptions; // Returns a structure with the default XNNPack delegate options. @@ -111,11 +115,13 @@ TFL_CAPI_EXPORT void TfLiteXNNPackDelegateDelete(TfLiteDelegate* delegate); // reduce memory bandwidth. TFL_CAPI_EXPORT struct TfLiteXNNPackDelegateWeightsCache* TfLiteXNNPackDelegateWeightsCacheCreate(); + // Creates a new weights cache with a specified initial size that can be shared // with multiple delegate instances. The weights cache can hold up to size bytes // without growing. TFL_CAPI_EXPORT struct TfLiteXNNPackDelegateWeightsCache* TfLiteXNNPackDelegateWeightsCacheCreateWithSize(size_t size); + // Soft-finalize a weights cache. Extra space will be left in the weights cache // to allow for cache "insertion" only if it is a cache hit. This has memory // overhead compared to TfLiteXNNPackDelegateWeightsCacheFinalizeHard. Use this @@ -124,6 +130,7 @@ TfLiteXNNPackDelegateWeightsCacheCreateWithSize(size_t size); // Returns true on success, false on error. TFL_CAPI_EXPORT bool TfLiteXNNPackDelegateWeightsCacheFinalizeSoft( struct TfLiteXNNPackDelegateWeightsCache* cache); + // Hard-finalize a weights cache, cache is effectively frozen and no more cache // operations are allowed. Memory is resized to smallest possible. Use this if // the number of interpreter instances using XNNPACK delegate can be fixed and @@ -132,6 +139,7 @@ TFL_CAPI_EXPORT bool TfLiteXNNPackDelegateWeightsCacheFinalizeSoft( // Returns true on success, false on error. TFL_CAPI_EXPORT bool TfLiteXNNPackDelegateWeightsCacheFinalizeHard( struct TfLiteXNNPackDelegateWeightsCache* cache); + // Destroys a weights cache created with // `TfLiteXNNPackDelegateWeightsCacheCreate` call. TFL_CAPI_EXPORT void TfLiteXNNPackDelegateWeightsCacheDelete( diff --git a/tensorflow/lite/tflite_with_xnnpack.cc b/tensorflow/lite/tflite_with_xnnpack.cc index 22e8617ec74e21..d443d404c21f05 100644 --- a/tensorflow/lite/tflite_with_xnnpack.cc +++ b/tensorflow/lite/tflite_with_xnnpack.cc @@ -23,6 +23,10 @@ namespace tflite { std::unique_ptr AcquireXNNPACKDelegate() { auto opts = TfLiteXNNPackDelegateOptionsDefault(); +#ifdef TFLITE_XNNPACK_DELEGATE_EXPERIMENTAL_WEIGHT_CACHE_FILE_PATH + opts.experimental_weight_cache_file_path = + TFLITE_XNNPACK_DELEGATE_EXPERIMENTAL_WEIGHT_CACHE_FILE_PATH; +#endif return std::unique_ptr( TfLiteXNNPackDelegateCreate(&opts), TfLiteXNNPackDelegateDelete); } diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index b2645a331739e3..b83a48134d02e1 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -101,6 +101,7 @@ tf_staging/tensorflow/lite/delegates/utils/experimental/stable_delegate/BUILD: tf_staging/tensorflow/lite/delegates/utils/experimental/stable_delegate/delegate_loader.cc: tf_staging/tensorflow/lite/delegates/utils/experimental/stable_delegate/delegate_loader.h: tf_staging/tensorflow/lite/delegates/utils/experimental/stable_delegate/delegate_loader_test.cc: +tf_staging/tensorflow/lite/delegates/xnnpack/weight_cache_schema_generated.h: tf_staging/tensorflow/lite/experimental/acceleration/configuration/configuration_generated.h: tf_staging/tensorflow/lite/experimental/acceleration/mini_benchmark/c/c_api.h: tf_staging/tensorflow/lite/experimental/acceleration/mini_benchmark/libjpeg.h: From fd2a7a57c5276612faff2fa0bf4a908e48c73811 Mon Sep 17 00:00:00 2001 From: Johannes Reifferscheid Date: Mon, 13 May 2024 04:03:57 -0700 Subject: [PATCH 013/478] Reduce input sizes in ReductionTest. The interpreter is very slow for some of these (up to ~45 seconds in unoptimized builds). With this change, runtime goes down to at most ~1 second per test case. At this point, sharding is no longer helpful (since the fixed setup cost dominates), so we can disable it. PiperOrigin-RevId: 633158930 --- third_party/xla/xla/service/gpu/fusions/BUILD | 1 - .../gpu/fusions/reduction_mlir_test.cc | 32 +++++++++---------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/third_party/xla/xla/service/gpu/fusions/BUILD b/third_party/xla/xla/service/gpu/fusions/BUILD index cb89eb885d4e59..02752d4bfffe04 100644 --- a/third_party/xla/xla/service/gpu/fusions/BUILD +++ b/third_party/xla/xla/service/gpu/fusions/BUILD @@ -859,7 +859,6 @@ cc_library( xla_cc_test( name = "reduction_mlir_test", srcs = ["reduction_mlir_test.cc"], - shard_count = 11, use_gpu = True, deps = [ ":mlir_emitter_test_base", diff --git a/third_party/xla/xla/service/gpu/fusions/reduction_mlir_test.cc b/third_party/xla/xla/service/gpu/fusions/reduction_mlir_test.cc index 45b46267a2cfa7..a34843735301ab 100644 --- a/third_party/xla/xla/service/gpu/fusions/reduction_mlir_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/reduction_mlir_test.cc @@ -41,26 +41,26 @@ TEST_F(ReductionTest, VariadicRowReduce) { ROOT t = (f32[], f32[]) tuple(add.0, add.1) } fused_computation { - param_0 = f32[5,200,2048] parameter(0) - param_1 = f32[5,200,2048] parameter(1) + param_0 = f32[2, 3, 2048] parameter(0) + param_1 = f32[2, 3, 2048] parameter(1) param_2 = f32[] parameter(2) - ROOT d.1 = (f32[5,200], f32[5,200]) + ROOT d.1 = (f32[2, 3], f32[2, 3]) reduce(param_0, param_1, param_2, param_2), dimensions={2}, to_apply=Add } ENTRY main { - a = f32[5, 200, 2048] parameter(0) - b = f32[5, 200, 2048] parameter(1) + a = f32[2, 3, 2048] parameter(0) + b = f32[2, 3, 2048] parameter(1) c = f32[] constant(0) - ROOT fusion = (f32[5,200], f32[5,200]) fusion(a, b, c), + ROOT fusion = (f32[2, 3], f32[2, 3]) fusion(a, b, c), kind=kInput, calls=fused_computation })"; TF_ASSERT_OK(EmitAndCheckIR(kHloString, R"( // CHECK: @fused_computation -// CHECK-SAME: %[[ARG0:.*]]: tensor<5x200x2048xf32> {xla.slice_index = 0 -// CHECK-SAME: %[[ARG1:.*]]: tensor<5x200x2048xf32> {xla.slice_index = 1 +// CHECK-SAME: %[[ARG0:.*]]: tensor<2x3x2048xf32> {xla.slice_index = 0 +// CHECK-SAME: %[[ARG1:.*]]: tensor<2x3x2048xf32> {xla.slice_index = 1 // CHECK-SAME: %[[INIT_TENSOR:.*]]: tensor {xla.slice_index = 2 -// CHECK-SAME: %[[OUT0:.*]]: tensor<5x200xf32> {xla.slice_index = 3 -// CHECK-SAME: %[[OUT1:.*]]: tensor<5x200xf32> {xla.slice_index = 4 +// CHECK-SAME: %[[OUT0:.*]]: tensor<2x3xf32> {xla.slice_index = 3 +// CHECK-SAME: %[[OUT1:.*]]: tensor<2x3xf32> {xla.slice_index = 4 // CHECK: %[[INIT:.*]] = xla_gpu.pure_call @fused_computation_param_2 // CHECK: %[[PER_THREAD:.*]]:2 = scf.for // CHECK-SAME: iter_args(%[[A:.*]] = %[[INIT]], %[[B:.*]] = %[[INIT]]) @@ -124,7 +124,7 @@ TEST_F(ReductionTest, RowReduceMOFEpilogue) { ROOT mul = f32[] multiply(lhs, rhs) } fused_computation { - param_0 = f32[8,2048] parameter(0) + param_0 = f32[8,1024] parameter(0) param_1 = f32[] parameter(1) reduce1 = f32[8] reduce(param_0, param_1), dimensions={1}, to_apply=Add reduce2 = f32[8] reduce(param_0, param_1), dimensions={1}, to_apply=Mul @@ -134,7 +134,7 @@ TEST_F(ReductionTest, RowReduceMOFEpilogue) { ROOT tuple = (f32[8], f32[8], f32[8]) tuple(log, neg, abs) } ENTRY main { - a = f32[8,2048] parameter(0) + a = f32[8,1024] parameter(0) c = f32[] constant(0) ROOT fusion = (f32[8], f32[8], f32[8]) fusion(a, c), kind=kInput, calls=fused_computation @@ -194,14 +194,14 @@ TEST_F(ReductionTest, ColumnReduction) { ROOT add = f32[] add(lhs, rhs) } fused_computation { - param_0 = f32[123,2051,321] parameter(0) + param_0 = f32[13,1051,321] parameter(0) param_1 = f32[] parameter(1) - ROOT reduce = f32[123,321] reduce(param_0, param_1), dimensions={1}, to_apply=Add + ROOT reduce = f32[13,321] reduce(param_0, param_1), dimensions={1}, to_apply=Add } ENTRY main { - a = f32[123,2051,321] parameter(0) + a = f32[13,1051,321] parameter(0) c = f32[] constant(0) - ROOT fusion = f32[123,321] fusion(a, c), kind=kInput, calls=fused_computation + ROOT fusion = f32[13,321] fusion(a, c), kind=kInput, calls=fused_computation })"; TF_ASSERT_OK(EmitAndCheckIR(kHloString, R"( // CHECK: xla_gpu.pure_call @Add_add From c70ea272d66d38af38d4ae424ed226bcadc05abd Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Mon, 13 May 2024 04:11:08 -0700 Subject: [PATCH 014/478] Disable cache invalidation logic for mlir emitters. We have logic to estimate the impact of certain ops on IR size. This problem is fixed with the MLIR emitters, so we don't need it for them. PiperOrigin-RevId: 633160564 --- .../service/gpu/model/gpu_hlo_cost_analysis.cc | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/third_party/xla/xla/service/gpu/model/gpu_hlo_cost_analysis.cc b/third_party/xla/xla/service/gpu/model/gpu_hlo_cost_analysis.cc index c1ec02bc5267cf..4bbe929730c661 100644 --- a/third_party/xla/xla/service/gpu/model/gpu_hlo_cost_analysis.cc +++ b/third_party/xla/xla/service/gpu/model/gpu_hlo_cost_analysis.cc @@ -218,12 +218,19 @@ bool GpuHloCostAnalysis::ProducerConsumerMergedTooLarge( IrBasicBlockSplitCount(consumer); VLOG(5) << "Basic block split counts: " << IrBasicBlockSplitCount(producer) << ", " << IrBasicBlockSplitCount(consumer) << " -> " << n_splits; - if (n_splits > kMaxBasicBlockSplitsPerFusion) { - return true; - } int64_t merged_ir_size = - (IrSize(producer) * producer_replication + IrSize(consumer)) * - (1 << n_splits); + (IrSize(producer) * producer_replication + IrSize(consumer)); + // The MLIR emitters don't have the problem with cache invalidation, so we + // don't need to evaluate basic block split counts. + if (!producer.GetModule() + ->config() + .debug_options() + .xla_gpu_enable_mlir_emitters()) { + if (n_splits > kMaxBasicBlockSplitsPerFusion) { + return true; + } + merged_ir_size *= (1 << n_splits); + } VLOG(5) << "IR sizes: " << IrSize(producer) << ", " << IrSize(consumer) << " -> " << merged_ir_size; return merged_ir_size > kMaxIRSize; From 399d09503d7d0a7584b994fe544eb1fdc76faa37 Mon Sep 17 00:00:00 2001 From: Quentin Khan Date: Mon, 13 May 2024 04:51:36 -0700 Subject: [PATCH 015/478] Fix weight cache test typo in XNNPack delegate. PiperOrigin-RevId: 633168188 --- tensorflow/lite/delegates/xnnpack/weight_cache_test.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/delegates/xnnpack/weight_cache_test.cc b/tensorflow/lite/delegates/xnnpack/weight_cache_test.cc index a270c4f712918f..4cf49e2144cbe4 100644 --- a/tensorflow/lite/delegates/xnnpack/weight_cache_test.cc +++ b/tensorflow/lite/delegates/xnnpack/weight_cache_test.cc @@ -607,8 +607,8 @@ TEST(MMapWeightCacheProviderTest, XnnpackCApiJourney) { TfLiteTensor tensors[kBufferCount]; std::unordered_map tensor_buffer_identifiers; for (int i = 0; i < kBufferCount; ++i) { - tensors[0].data.data = (void*)(fake_buffer_pointer + i); - tensor_buffer_identifiers[i] = i + 1; + tensors[i].data.data = (void*)(fake_buffer_pointer + i); + tensor_buffer_identifiers[i] = i; } MMapWeightCacheProvider cache_provider; @@ -680,8 +680,8 @@ TEST(MMapWeightCacheProviderTest, XnnpackCApiJourney) { TfLiteTensor tensors[kBufferCount]; std::unordered_map tensor_buffer_identifiers; for (int i = 0; i < kBufferCount; ++i) { - tensors[0].data.data = (void*)(fake_buffer_pointer + i); - tensor_buffer_identifiers[i] = i + 1; + tensors[i].data.data = (void*)(fake_buffer_pointer + i); + tensor_buffer_identifiers[i] = i; } MMapWeightCacheProvider cache_provider; From 6487006a061ed706e713c5e025c46cf19b9d42c7 Mon Sep 17 00:00:00 2001 From: Johannes Reifferscheid Date: Mon, 13 May 2024 05:19:07 -0700 Subject: [PATCH 016/478] Enable VLOGging of pass pipelines in mlir_fusion_emitter. PiperOrigin-RevId: 633174085 --- .../xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc b/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc index 286b3e211df558..41108b774751b9 100644 --- a/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc +++ b/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc @@ -287,6 +287,9 @@ MlirFusionEmitterBase::CreateLLVMModule( buffer_assignment)); mlir::PassManager pm(&mlir_context); + if (VLOG_IS_ON(5)) { + pm.enableIRPrinting(); + } pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(mlir::createInlinerPass()); pm.addPass(mlir::createCanonicalizerPass()); @@ -430,6 +433,10 @@ MlirFusionEmitterBase::CreateMLIRModule( // Run a minimal simplification pipeline. mlir::PassManager pm(&context); + if (VLOG_IS_ON(5)) { + context.disableMultithreading(); + pm.enableIRPrinting(); + } pm.addPass(CreateSimplifyArithPass()); pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(mlir::createCSEPass()); From 33fe3156406ace60347447a4d5fdb3d8a2b29b94 Mon Sep 17 00:00:00 2001 From: Quentin Khan Date: Mon, 13 May 2024 05:22:08 -0700 Subject: [PATCH 017/478] Do not deinitialize XNNPack in `TfLiteXNNPackDelegateWeightsCacheDelete`. PiperOrigin-RevId: 633174598 --- tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc index 1503fea039580f..6ed0696d8d9e5b 100644 --- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc +++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc @@ -7846,7 +7846,6 @@ void TfLiteXNNPackDelegateWeightsCacheDelete( } auto weights_cache = reinterpret_cast(cache); xnn_delete_weights_cache(weights_cache); - xnn_deinitialize(); } TfLiteXNNPackDelegateOptions TfLiteXNNPackDelegateOptionsDefault() { From ea106b7963a5ec91cf3eab9108f058d695289600 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 05:25:26 -0700 Subject: [PATCH 018/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633175341 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 0556051577f5e4..53c362ed0ab819 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugstr +go/debugstr op { name: "Abort" attr { From 676078f7101ea1174eef7646a98ab563bfe3f213 Mon Sep 17 00:00:00 2001 From: Greg Olechwierowicz Date: Mon, 13 May 2024 07:05:18 -0700 Subject: [PATCH 019/478] [XLA:GPU] Add flag for fully unrolling while loops PiperOrigin-RevId: 633196820 --- third_party/xla/xla/debug_options_flags.cc | 2 + .../xla/xla/service/gpu/gpu_compiler.cc | 53 +++++++++++-------- third_party/xla/xla/xla.proto | 14 ++++- 3 files changed, 46 insertions(+), 23 deletions(-) diff --git a/third_party/xla/xla/debug_options_flags.cc b/third_party/xla/xla/debug_options_flags.cc index 3b31ed9bc3f0b7..3eb1a7a2fd4292 100644 --- a/third_party/xla/xla/debug_options_flags.cc +++ b/third_party/xla/xla/debug_options_flags.cc @@ -211,6 +211,8 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { opts.set_xla_gpu_cublas_fallback(true); opts.set_xla_gpu_cudnn_gemm_fusion_level(0); opts.set_xla_gpu_enable_while_loop_double_buffering(false); + opts.set_xla_gpu_enable_while_loop_unrolling( + DebugOptions::WHILE_LOOP_UNROLLING_NO_UNROLL); opts.set_xla_gpu_ensure_minor_dot_contraction_dims(false); opts.set_xla_gpu_filter_kernels_spilling_registers_on_autotuning(true); opts.set_xla_gpu_llvm_verification_level(0); diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index 9a861ec2cb0c71..2d33d67d2d6064 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -1066,36 +1066,27 @@ absl::Status RunPostFusionPasses( HloModule* hlo_module, std::function add_custom_kernel_replacement_passes) { + const DebugOptions& opts = hlo_module->config().debug_options(); + HloPassPipeline pipeline("post-fusion optimization"); pipeline.AddPass(); pipeline.AddPass( - hlo_module->config() - .debug_options() - .xla_gpu_all_gather_combine_threshold_bytes(), + opts.xla_gpu_all_gather_combine_threshold_bytes(), /*combine_threshold_count=*/256, - hlo_module->config() - .debug_options() - .xla_gpu_enable_all_gather_combine_by_dim()); + opts.xla_gpu_enable_all_gather_combine_by_dim()); pipeline.AddPass( - hlo_module->config() - .debug_options() - .xla_gpu_all_reduce_combine_threshold_bytes(), + opts.xla_gpu_all_reduce_combine_threshold_bytes(), /*combine_threshold_count=*/256); pipeline.AddPass( - hlo_module->config() - .debug_options() - .xla_gpu_reduce_scatter_combine_threshold_bytes(), + opts.xla_gpu_reduce_scatter_combine_threshold_bytes(), /*combine_threshold_count=*/256, - hlo_module->config() - .debug_options() - .xla_gpu_enable_reduce_scatter_combine_by_dim()); + opts.xla_gpu_enable_reduce_scatter_combine_by_dim()); - if (hlo_module->config().debug_options().xla_gpu_all_reduce_contiguous()) { + if (opts.xla_gpu_all_reduce_contiguous()) { pipeline.AddPass(); } - TF_RETURN_IF_ERROR(add_custom_kernel_replacement_passes( - &pipeline, hlo_module->config().debug_options())); + TF_RETURN_IF_ERROR(add_custom_kernel_replacement_passes(&pipeline, opts)); int32_t blueconnect_num_devices_per_host = hlo_module->config() @@ -1105,10 +1096,28 @@ absl::Status RunPostFusionPasses( pipeline.AddPass(blueconnect_num_devices_per_host); } - if (hlo_module->config() - .debug_options() - .xla_gpu_enable_while_loop_double_buffering()) { - pipeline.AddPass(); + std::optional unroll_strategy = + std::nullopt; + // Support old flag. + if (opts.xla_gpu_enable_while_loop_double_buffering()) { + unroll_strategy = + LoopDoubleBufferTransformer::UnrollStrategy::kDoubleBuffer; + } + // Support new flag setting style, override the old one. + if (opts.xla_gpu_enable_while_loop_unrolling() == + DebugOptions::WHILE_LOOP_UNROLLING_DOUBLE_BUFFER) { + unroll_strategy = + LoopDoubleBufferTransformer::UnrollStrategy::kDoubleBuffer; + } + if (opts.xla_gpu_enable_while_loop_unrolling() == + DebugOptions::WHILE_LOOP_UNROLLING_FULL_UNROLL) { + LOG_IF(WARNING, unroll_strategy != std::nullopt) + << "Overriding double buffering set via " + "`xla_gpu_enable_while_loop_double_buffering` flag."; + unroll_strategy = LoopDoubleBufferTransformer::UnrollStrategy::kFullUnroll; + } + if (unroll_strategy != std::nullopt) { + pipeline.AddPass(*unroll_strategy); pipeline.AddPass(); pipeline.AddPass(); } diff --git a/third_party/xla/xla/xla.proto b/third_party/xla/xla/xla.proto index e7055359768113..c49d4a509a2618 100644 --- a/third_party/xla/xla/xla.proto +++ b/third_party/xla/xla/xla.proto @@ -668,6 +668,18 @@ message DebugOptions { // Enable double buffering for loops. bool xla_gpu_enable_while_loop_double_buffering = 248; + enum WhileLoopUnrolling { + WHILE_LOOP_UNROLLING_NO_UNROLL = 0; + // Has the same effect as setting + // `xla_gpu_enable_while_loop_double_buffering`. + WHILE_LOOP_UNROLLING_DOUBLE_BUFFER = 1; + // Enables full loop unrolling using the same strategy as `DOUBLE_BUFFER`. + WHILE_LOOP_UNROLLING_FULL_UNROLL = 2; + } + + // Determine the while loop unrolling scheme. + WhileLoopUnrolling xla_gpu_enable_while_loop_unrolling = 294; + // Change the layout of the second triton dot operand to be column major. // Only works for (bf16 x bf16) -> bf16. bool xla_gpu_ensure_minor_dot_contraction_dims = 249; @@ -767,7 +779,7 @@ message DebugOptions { // Base length to rewrite the reduce window to, no rewrite if set to 0. int64 xla_reduce_window_rewrite_base_length = 293; - // Next id: 294 + // Next id: 295 // Extra options to pass to the compilation backend (e.g. LLVM); specific // interpretation of these values is left to the backend. From b010326f4d779f1540343faf838ee14480c34588 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 07:17:38 -0700 Subject: [PATCH 020/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633199826 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 53c362ed0ab819..68d78d0329a9ac 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugstr +go/nodeserialize op { name: "Abort" attr { From bd8a712cbdfa268baf2c91bb165d0d9849576389 Mon Sep 17 00:00:00 2001 From: Johannes Reifferscheid Date: Mon, 13 May 2024 07:30:48 -0700 Subject: [PATCH 021/478] Reshape multi-dimensional constants to 1d. The LLVM lowering doesn't support arbitrary shapes. PiperOrigin-RevId: 633203497 --- .../xla/xla/service/gpu/fusions/mlir/lower_tensors.cc | 7 +++++++ .../xla/service/gpu/fusions/mlir/tests/lower_tensors.mlir | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/third_party/xla/xla/service/gpu/fusions/mlir/lower_tensors.cc b/third_party/xla/xla/service/gpu/fusions/mlir/lower_tensors.cc index a529e6d86d00e3..5677b45e5342bf 100644 --- a/third_party/xla/xla/service/gpu/fusions/mlir/lower_tensors.cc +++ b/third_party/xla/xla/service/gpu/fusions/mlir/lower_tensors.cc @@ -334,6 +334,13 @@ mlir::LLVM::GlobalOp CreateGlobalOp(mlir::Attribute value, mlir::ModuleOp module, bool is_constant, int addr_space, mlir::ImplicitLocOpBuilder& b) { + if (auto elements = mlir::dyn_cast_or_null(value)) { + // The lowering to LLVM only works for 1d tensors or those with trailing + // unit dimensions. + value = elements.reshape(mlir::RankedTensorType::get( + {elements.getNumElements()}, elements.getElementType())); + } + Type element_type = shaped_ty.getElementType(); // Needed to support complex element type. mlir::LLVMTypeConverter converter(b.getContext()); diff --git a/third_party/xla/xla/service/gpu/fusions/mlir/tests/lower_tensors.mlir b/third_party/xla/xla/service/gpu/fusions/mlir/tests/lower_tensors.mlir index 707cfc6f34196c..d93f3ecd514c24 100644 --- a/third_party/xla/xla/service/gpu/fusions/mlir/tests/lower_tensors.mlir +++ b/third_party/xla/xla/service/gpu/fusions/mlir/tests/lower_tensors.mlir @@ -163,8 +163,8 @@ module { return %0 : f32 } } -// CHECK: llvm.mlir.global private constant @global_cst_0(dense<[ -// CHECK-SAME: [1.000000e+00], [2.000000e+00]]> : tensor<2x1xf32>) {addr_space = 0 : i32} : !llvm.array<2 x f32> +// CHECK: llvm.mlir.global private constant @global_cst_0(dense< +// CHECK-SAME: [1.000000e+00, 2.000000e+00]> : tensor<2xf32>) {addr_space = 0 : i32} : !llvm.array<2 x f32> // CHECK: @extract_from_constant // CHECK: %[[ADDR_OF:.*]] = llvm.mlir.addressof @global_cst_0 : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr inbounds %[[ADDR_OF]][%{{.*}}] : (!llvm.ptr, i64) -> !llvm.ptr, f32 From ad24c813bcf671fbb42da70efcf3e0ac4fe94a5f Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Mon, 13 May 2024 08:20:14 -0700 Subject: [PATCH 022/478] PR #12255: [GPU] Fix FMHA hangs by moving compilation to thunk initialization. Imported from GitHub PR https://github.com/openxla/xla/pull/12255 Applies the same fix as in https://github.com/openxla/xla/pull/12228 to FMHA. Copybara import of the project: -- 70a42828f86711a0e83a2eb37ee52833e1768187 by Ilia Sergachev : [GPU] Fix FMHA hangs by moving compilation to thunk initialization. Merging this change closes #12255 PiperOrigin-RevId: 633217196 --- .../xla/service/gpu/gpu_fused_mha_runner.cc | 89 ++++++++++--------- .../xla/service/gpu/gpu_fused_mha_runner.h | 8 ++ .../service/gpu/runtime/fused_mha_thunk.cc | 14 +++ .../xla/service/gpu/runtime/fused_mha_thunk.h | 2 + 4 files changed, 70 insertions(+), 43 deletions(-) diff --git a/third_party/xla/xla/service/gpu/gpu_fused_mha_runner.cc b/third_party/xla/xla/service/gpu/gpu_fused_mha_runner.cc index 61be619b1d46d8..09af1e523ab7f1 100644 --- a/third_party/xla/xla/service/gpu/gpu_fused_mha_runner.cc +++ b/third_party/xla/xla/service/gpu/gpu_fused_mha_runner.cc @@ -68,29 +68,13 @@ absl::Status RunFusedMHA(GpufMHAParams params, se::Stream *stream, dropout_rate = *params.config->dropout_rate; } - double scale = 1.0; - if (params.config->fmha_scale) { - scale = *params.config->fmha_scale; - } - std::optional seed; if (params.config->seed) { seed = *params.config->seed; } - TF_ASSIGN_OR_RETURN( - se::dnn::FMHAMaskKind mask_type, - GetDNNFmhaMaskKindFromCudnnFmhaMaskKind(params.config->mask_type)); - se::dnn::FusedMHAOp::Config config{scale, - params.config->lhs_bmm1, - params.config->rhs_bmm1, - params.config->rhs_bmm2, - params.config->intermediate_lhs_bmm2, - params.config->output, - params.config->bias, - params.config->activation, - dropout_rate, - seed, - mask_type}; + + TF_ASSIGN_OR_RETURN(se::dnn::FusedMHAOp::Config config, + params.config->AsDnnFusedMHAOpConfig()); TF_ASSIGN_OR_RETURN(auto *runner, lazy_runner->GetOrCreateRunner(config, stream)); return (*runner)(stream, options.profile_result, scratch_memory, @@ -183,35 +167,13 @@ absl::Status RunFusedMHABackward( dropout_rate = *params.config->dropout_rate; } - double scale = 1.0; - if (params.config->fmha_scale) { - scale = *params.config->fmha_scale; - } - std::optional seed; if (params.config->seed) { seed = *params.config->seed; } - TF_ASSIGN_OR_RETURN( - se::dnn::FMHAMaskKind mask_type, - GetDNNFmhaMaskKindFromCudnnFmhaMaskKind(params.config->mask_type)); - se::dnn::FusedMHABackwardOp::Config config{scale, - params.config->bmm1_grad_gemm1_rhs, - params.config->bmm1_grad_gemm2_rhs, - params.config->bmm2_grad_gemm1_lhs, - params.config->bmm2_grad_gemm2_rhs, - params.config->d_output, - params.config->d_bmm1_lhs, - params.config->d_bmm1_rhs, - params.config->d_bmm2_rhs, - params.config->d_s, - params.config->d_bias, - params.config->fwd_output, - params.config->bias, - dropout_rate, - seed, - mask_type}; + TF_ASSIGN_OR_RETURN(se::dnn::FusedMHABackwardOp::Config config, + params.config->AsDnnFusedMHABackwardOpConfig()); TF_ASSIGN_OR_RETURN(auto *runner, lazy_runner->GetOrCreateRunner(config, stream)); // TODO: pass in real softmax_sum, dQ_accum, fwd_output @@ -404,6 +366,21 @@ absl::Status RunGpuFMHABackwardImpl(const GpufMHABackwardParams ¶ms, return config; } +absl::StatusOr +GpufMHAConfig::AsDnnFusedMHAOpConfig() const { + double scale = 1.0; + if (fmha_scale.has_value()) { + scale = *fmha_scale; + } + TF_ASSIGN_OR_RETURN(se::dnn::FMHAMaskKind mask_type, + GetDNNFmhaMaskKindFromCudnnFmhaMaskKind(mask_type)); + + return se::dnn::FusedMHAOp::Config{ + scale, lhs_bmm1, rhs_bmm1, rhs_bmm2, intermediate_lhs_bmm2, + output, bias, activation, dropout_rate, seed, + mask_type}; +} + /*static*/ absl::StatusOr GpufMHABackwardConfig::For( const GpufMHABackwardDescriptor &desc) { // Get shapes from desc. @@ -546,6 +523,32 @@ absl::Status RunGpuFMHABackwardImpl(const GpufMHABackwardParams ¶ms, return config; } +absl::StatusOr +GpufMHABackwardConfig::AsDnnFusedMHABackwardOpConfig() const { + double scale = 1.0; + if (fmha_scale.has_value()) { + scale = *fmha_scale; + } + TF_ASSIGN_OR_RETURN(se::dnn::FMHAMaskKind mask_type, + GetDNNFmhaMaskKindFromCudnnFmhaMaskKind(mask_type)); + return se::dnn::FusedMHABackwardOp::Config{scale, + bmm1_grad_gemm1_rhs, + bmm1_grad_gemm2_rhs, + bmm2_grad_gemm1_lhs, + bmm2_grad_gemm2_rhs, + d_output, + d_bmm1_lhs, + d_bmm1_rhs, + d_bmm2_rhs, + d_s, + d_bias, + fwd_output, + bias, + dropout_rate, + seed, + mask_type}; +} + /*static*/ absl::StatusOr GpufMHAParams::For( const GpufMHAConfig &config, se::DeviceMemoryBase lhs_bmm1_buffer, se::DeviceMemoryBase rhs_bmm1_buffer, se::DeviceMemoryBase rhs_bmm2_buffer, diff --git a/third_party/xla/xla/service/gpu/gpu_fused_mha_runner.h b/third_party/xla/xla/service/gpu/gpu_fused_mha_runner.h index 6538d542984325..7ca35805be251e 100644 --- a/third_party/xla/xla/service/gpu/gpu_fused_mha_runner.h +++ b/third_party/xla/xla/service/gpu/gpu_fused_mha_runner.h @@ -101,10 +101,14 @@ struct GpufMHABackwardDescriptor { std::optional d_bias_shape; std::optional bias_shape; }; + // Structure to describe static properties of a GPU fused Multi-Headed // Attention. struct GpufMHAConfig { static absl::StatusOr For(const GpufMHADescriptor& fmha_desc); + + absl::StatusOr AsDnnFusedMHAOpConfig() const; + PrimitiveType input_type; // Capture the primitive type of one of the inputs of BMM1 PrimitiveType output_type; @@ -133,6 +137,10 @@ struct GpufMHAConfig { struct GpufMHABackwardConfig { static absl::StatusOr For( const GpufMHABackwardDescriptor& fmha_desc); + + absl::StatusOr + AsDnnFusedMHABackwardOpConfig() const; + PrimitiveType input_type; // Capture the primitive type of one of the inputs of BMM1 PrimitiveType output_type; diff --git a/third_party/xla/xla/service/gpu/runtime/fused_mha_thunk.cc b/third_party/xla/xla/service/gpu/runtime/fused_mha_thunk.cc index efa90bfc9feae3..41613f0121e65f 100644 --- a/third_party/xla/xla/service/gpu/runtime/fused_mha_thunk.cc +++ b/third_party/xla/xla/service/gpu/runtime/fused_mha_thunk.cc @@ -65,6 +65,13 @@ std::optional AssignBufferIfNotNull( : std::nullopt; } +absl::Status FusedMHAThunk::Initialize(const InitializeParams& params) { + se::dnn::LazyOpRunner* lazy_runner = + GetOrCreateRunner(params.stream).AsFusedMHARunner(); + TF_ASSIGN_OR_RETURN(auto config, config_.AsDnnFusedMHAOpConfig()); + return lazy_runner->GetOrCreateRunner(config, params.stream).status(); +} + absl::Status FusedMHAThunk::ExecuteOnStream(const ExecuteParams& params) { const auto& buffer_allocations = *params.buffer_allocations; se::DeviceMemoryBase lhs_bmm1_buffer = @@ -143,6 +150,13 @@ FusedMHABackwardThunk::GetOrCreateRunner( return *it->second; } +absl::Status FusedMHABackwardThunk::Initialize(const InitializeParams& params) { + se::dnn::LazyOpRunner* lazy_runner = + GetOrCreateRunner(params.stream).AsFusedMHABackwardRunner(); + TF_ASSIGN_OR_RETURN(auto config, config_.AsDnnFusedMHABackwardOpConfig()); + return lazy_runner->GetOrCreateRunner(config, params.stream).status(); +} + absl::Status FusedMHABackwardThunk::ExecuteOnStream( const ExecuteParams& params) { const auto& buffer_allocations = *params.buffer_allocations; diff --git a/third_party/xla/xla/service/gpu/runtime/fused_mha_thunk.h b/third_party/xla/xla/service/gpu/runtime/fused_mha_thunk.h index 32bfdd0ecd19f4..bf9cff354b5027 100644 --- a/third_party/xla/xla/service/gpu/runtime/fused_mha_thunk.h +++ b/third_party/xla/xla/service/gpu/runtime/fused_mha_thunk.h @@ -53,6 +53,7 @@ class FusedMHAThunk : public Thunk { FusedMHAThunk(const FusedMHAThunk&) = delete; FusedMHAThunk& operator=(const FusedMHAThunk&) = delete; + absl::Status Initialize(const InitializeParams& params) override; absl::Status ExecuteOnStream(const ExecuteParams& params) override; private: @@ -101,6 +102,7 @@ class FusedMHABackwardThunk : public Thunk { FusedMHABackwardThunk(const FusedMHABackwardThunk&) = delete; FusedMHABackwardThunk& operator=(const FusedMHABackwardThunk&) = delete; + absl::Status Initialize(const InitializeParams& params) override; absl::Status ExecuteOnStream(const ExecuteParams& params) override; private: From 1135035e5ee99295a1d675d179e4bd433e4192f9 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Mon, 13 May 2024 08:31:43 -0700 Subject: [PATCH 023/478] PR #12228: [GPU] Fix hang with cudnn layer norm by moving build phase to Initialize() Imported from GitHub PR https://github.com/openxla/xla/pull/12228 The first time that a NormThunk is executed, it will build a cudnn execution plan. This build step can hang if a NCCL collective is running at the same time. To fix this, I've moved the build step to take place during thunk initialization. We only observe this hang when using cudnn 9. Here's a backtrace from the hang that will be fixed: ``` Thread 585 (Thread 0x7fb9391ff640 (LWP 41364) "main.py"): #0 0x00007fd3d17cffd9 in ?? () from /lib/x86_64-linux-gnu/libc.so.6 #1 0x00007fd3d17da24f in pthread_rwlock_wrlock () from /lib/x86_64-linux-gnu/libc.so.6 #2 0x00007fd070967dfe in ?? () from /lib/x86_64-linux-gnu/libcuda.so.1 #3 0x00007fd0709c928a in ?? () from /lib/x86_64-linux-gnu/libcuda.so.1 #4 0x00007f1970d76102 in ?? () from /lib/x86_64-linux-gnu/libcudnn_engines_precompiled.so.9.1.0 #5 0x00007f1970f2c999 in ?? () from /lib/x86_64-linux-gnu/libcudnn_engines_precompiled.so.9.1.0 #6 0x00007f1970a7d4ab in ?? () from /lib/x86_64-linux-gnu/libcudnn_engines_precompiled.so.9.1.0 #7 0x00007f1970d0a9cb in ?? () from /lib/x86_64-linux-gnu/libcudnn_engines_precompiled.so.9.1.0 #8 0x00007fce60b2a98c in cudnn::backend::ExecutionPlan::finalize_internal() () from /lib/x86_64-linux-gnu/libcudnn_graph.so.9.1.0 #9 0x00007fce60aefbb1 in cudnn::backend::Descriptor::finalize() () from /lib/x86_64-linux-gnu/libcudnn_graph.so.9.1.0 #10 0x00007fce60b15bec in cudnnBackendFinalize () from /lib/x86_64-linux-gnu/libcudnn_graph.so.9.1.0 #11 0x00007fd2521b8f39 in cudnn_frontend::ExecutionPlanBuilder_v8::build() () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so #12 0x00007fd2521734ba in stream_executor::gpu::(anonymous namespace)::GetExecPlanFromHeuristics(cudnn_frontend::OperationGraph_v8&&, stream_executor::gpu::(anonymous namespace)::CudnnHandle const&, bool) () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so #13 0x00007fd25216ff9b in stream_executor::gpu::CudnnSupport::NormRunnerFromDesc(stream_executor::Stream*, stream_executor::dnn::AlgorithmDesc const&, stream_executor::dnn::NormKind, double, stream_executor::dnn::TensorDescriptor const&, stream_executor::dnn::TensorDescriptor const&, stream_executor::dnn::TensorDescriptor const&, std::optional, std::optional, std::optional, std::optional, std::optional, std::optional) () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so #14 0x00007fd24e36b88b in stream_executor::dnn::NormOp::RunnerFromAlgorithmDesc(stream_executor::dnn::AlgorithmDesc const&, stream_executor::dnn::NormOp::Config, stream_executor::Stream*) () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so #15 0x00007fd24e36ae37 in stream_executor::dnn::LazyOpRunner::GetOrCreateRunner(stream_executor::dnn::NormOp::Config, stream_executor::Stream*)::{lambda()#1}::operator()() const () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so #16 0x00007fd24e36adbc in void absl::lts_20230802::base_internal::CallOnceImpl::GetOrCreateRunner(stream_executor::dnn::NormOp::Config, stream_executor::Stream*)::{lambda()#1}>(std::atomic*, absl::lts_20230802::base_internal::SchedulingMode, stream_executor::dnn::LazyOpRunner::GetOrCreateRunner(stream_executor::dnn::NormOp::Config, stream_executor::Stream*)::{lambda()#1}&&) () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so #17 0x00007fd24e36a9bd in stream_executor::dnn::LazyOpRunner::GetOrCreateRunner(stream_executor::dnn::NormOp::Config, stream_executor::Stream*) () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so #18 0x00007fd24e369d29 in xla::gpu::RunGpuNorm(xla::gpu::GpuNormConfig const&, stream_executor::DeviceMemoryBase const&, stream_executor::DeviceMemoryBase const&, stream_executor::DeviceMemoryBase const&, std::optional, std::optional, std::optional, std::optional, std::optional, std::optional, stream_executor::DeviceMemoryBase const&, stream_executor::Stream*, xla::gpu::RunNormOptions) () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so #19 0x00007fd24e368be6 in xla::gpu::NormThunk::ExecuteOnStream(xla::gpu::Thunk::ExecuteParams const&) () from /usr/local/lib/python3.10/dist-packages/jaxlib/xla_extension.so ``` Copybara import of the project: -- f53533087ba1ddcf65ad7cc6268ee89de4690d15 by Trevor Morris : Fix hang with cudnn layer norm by moving cudnn init to Initialize() Merging this change closes #12228 PiperOrigin-RevId: 633220207 --- .../xla/xla/service/gpu/gpu_norm_runner.cc | 18 ++---------------- .../xla/xla/service/gpu/gpu_norm_runner.h | 16 ++++++++++++++++ .../xla/xla/service/gpu/runtime/norm_thunk.cc | 9 +++++++++ .../xla/xla/service/gpu/runtime/norm_thunk.h | 1 + 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/third_party/xla/xla/service/gpu/gpu_norm_runner.cc b/third_party/xla/xla/service/gpu/gpu_norm_runner.cc index 5abb58af4e019d..9170de6016e60a 100644 --- a/third_party/xla/xla/service/gpu/gpu_norm_runner.cc +++ b/third_party/xla/xla/service/gpu/gpu_norm_runner.cc @@ -44,22 +44,8 @@ absl::Status RunGpuNorm(const gpu::GpuNormConfig& config, se::Stream* stream, RunNormOptions options) { se::dnn::LazyOpRunner* lazy_runner = options.norm_runner->AsNormRunner(); - std::optional> local_runner; - - TF_ASSIGN_OR_RETURN(se::dnn::NormKind kind, - GetDNNNormKindFromCudnnNormKind(config.kind)); - - se::dnn::NormOp::Config ln_config{kind, - config.epsilon, - config.x_descriptor, - config.scale_descriptor, - config.y_or_dx_descriptor, - config.bias_descriptor, - config.dy_descriptor, - config.expectation_descriptor, - config.norm_factor_descriptor, - config.dscale_descriptor, - config.dbias_descriptor}; + TF_ASSIGN_OR_RETURN(se::dnn::NormOp::Config ln_config, + config.AsDnnNormOpConfig()); TF_ASSIGN_OR_RETURN(auto* runner, lazy_runner->GetOrCreateRunner(ln_config, stream)); diff --git a/third_party/xla/xla/service/gpu/gpu_norm_runner.h b/third_party/xla/xla/service/gpu/gpu_norm_runner.h index 854e3c0892050c..8461671e86d037 100644 --- a/third_party/xla/xla/service/gpu/gpu_norm_runner.h +++ b/third_party/xla/xla/service/gpu/gpu_norm_runner.h @@ -118,6 +118,22 @@ struct GpuNormConfig { return config; } + absl::StatusOr AsDnnNormOpConfig() const { + TF_ASSIGN_OR_RETURN(se::dnn::NormKind norm_kind, + GetDNNNormKindFromCudnnNormKind(kind)); + return se::dnn::NormOp::Config{norm_kind, + epsilon, + x_descriptor, + scale_descriptor, + y_or_dx_descriptor, + bias_descriptor, + dy_descriptor, + expectation_descriptor, + norm_factor_descriptor, + dscale_descriptor, + dbias_descriptor}; + } + double epsilon; CudnnNormKind kind; se::dnn::AlgorithmDesc algorithm; diff --git a/third_party/xla/xla/service/gpu/runtime/norm_thunk.cc b/third_party/xla/xla/service/gpu/runtime/norm_thunk.cc index d3862f7bfeac74..71c0744686e402 100644 --- a/third_party/xla/xla/service/gpu/runtime/norm_thunk.cc +++ b/third_party/xla/xla/service/gpu/runtime/norm_thunk.cc @@ -106,5 +106,14 @@ absl::Status NormThunk::ExecuteOnStream(const ExecuteParams& params) { return absl::OkStatus(); } +absl::Status NormThunk::Initialize(const InitializeParams& params) { + // Create the runner at initialization time to avoid hangs if we try to build + // the execution plan while a NCCL collective is running. + se::dnn::LazyOpRunner* lazy_runner = + GetOrCreateRunner(params.stream).AsNormRunner(); + TF_ASSIGN_OR_RETURN(auto ln_config, config_.AsDnnNormOpConfig()); + return lazy_runner->GetOrCreateRunner(ln_config, params.stream).status(); +} + } // namespace gpu } // namespace xla diff --git a/third_party/xla/xla/service/gpu/runtime/norm_thunk.h b/third_party/xla/xla/service/gpu/runtime/norm_thunk.h index 602d504175fb3d..eca5deca3a68b7 100644 --- a/third_party/xla/xla/service/gpu/runtime/norm_thunk.h +++ b/third_party/xla/xla/service/gpu/runtime/norm_thunk.h @@ -49,6 +49,7 @@ class NormThunk : public Thunk { NormThunk& operator=(const NormThunk&) = delete; absl::Status ExecuteOnStream(const ExecuteParams& params) override; + absl::Status Initialize(const InitializeParams& params) override; private: BufferAllocation::Slice x_buffer_; From f83f81c2cd4e57e1d6588ced9bbcb2cab011bd26 Mon Sep 17 00:00:00 2001 From: Vladyslav Tsilytskyi Date: Mon, 13 May 2024 08:44:40 -0700 Subject: [PATCH 024/478] Reverts 28bf7478c8481648dc3900e0ab52a5e0887f5145 PiperOrigin-RevId: 633223628 --- .../xla/xla/stream_executor/host/BUILD | 63 ++- .../host/host_execution_engine.cc | 390 ++++++++++++++++++ .../host/host_execution_engine.h | 157 +++++++ .../xla/stream_executor/host/host_executor.cc | 54 ++- .../xla/stream_executor/host/host_executor.h | 11 +- .../xla/stream_executor/host/host_kernel.cc | 5 +- .../xla/stream_executor/host/host_kernel.h | 37 +- .../stream_executor/host/host_kernel_test.cc | 67 +++ 8 files changed, 768 insertions(+), 16 deletions(-) create mode 100644 third_party/xla/xla/stream_executor/host/host_execution_engine.cc create mode 100644 third_party/xla/xla/stream_executor/host/host_execution_engine.h diff --git a/third_party/xla/xla/stream_executor/host/BUILD b/third_party/xla/xla/stream_executor/host/BUILD index 43af63f32502bf..da6b0d35ba7d5b 100644 --- a/third_party/xla/xla/stream_executor/host/BUILD +++ b/third_party/xla/xla/stream_executor/host/BUILD @@ -1,6 +1,14 @@ # Description: # Host-platform specific StreamExecutor support code. +load( + "@local_tsl//tsl/platform:build_config_root.bzl", + "if_llvm_aarch32_available", + "if_llvm_aarch64_available", + "if_llvm_powerpc_available", + "if_llvm_system_z_available", + "if_llvm_x86_available", +) load("@local_tsl//tsl/platform:rules_cc.bzl", "cc_library") load("//xla:xla.bzl", "xla_cc_test") load("//xla/stream_executor:build_defs.bzl", "stream_executor_friends") @@ -81,15 +89,60 @@ cc_library( hdrs = ["host_kernel_c_api.h"], ) +cc_library( + name = "host_execution_engine", + srcs = ["host_execution_engine.cc"], + hdrs = ["host_execution_engine.h"], + deps = [ + ":host_kernel_c_api", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/types:span", + "@llvm-project//llvm:Analysis", + "@llvm-project//llvm:AsmParser", + "@llvm-project//llvm:Core", + "@llvm-project//llvm:ExecutionEngine", + "@llvm-project//llvm:JITLink", + "@llvm-project//llvm:OrcJIT", + "@llvm-project//llvm:OrcShared", + "@llvm-project//llvm:Passes", + "@llvm-project//llvm:Support", + "@llvm-project//llvm:Target", + "@llvm-project//llvm:TargetParser", + "@llvm-project//llvm:TransformUtils", + "@llvm-project//llvm:ir_headers", + "@local_tsl//tsl/platform:statusor", + ] + if_llvm_aarch32_available([ + "@llvm-project//llvm:ARMAsmParser", + "@llvm-project//llvm:ARMCodeGen", + ]) + if_llvm_aarch64_available([ + "@llvm-project//llvm:AArch64AsmParser", + "@llvm-project//llvm:AArch64CodeGen", + ]) + if_llvm_powerpc_available([ + "@llvm-project//llvm:PowerPCAsmParser", + "@llvm-project//llvm:PowerPCCodeGen", + ]) + if_llvm_system_z_available([ + "@llvm-project//llvm:SystemZAsmParser", + "@llvm-project//llvm:SystemZCodeGen", + ]) + if_llvm_x86_available([ + "@llvm-project//llvm:X86AsmParser", + "@llvm-project//llvm:X86CodeGen", + ]), +) + cc_library( name = "host_kernel", srcs = ["host_kernel.cc"], hdrs = ["host_kernel.h"], deps = [ + ":host_execution_engine", ":host_kernel_c_api", "//xla/stream_executor", "//xla/stream_executor:device_memory", - "//xla/stream_executor:stream_executor_interface", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/types:span", @@ -102,9 +155,12 @@ xla_cc_test( deps = [ ":host_kernel", ":host_kernel_c_api", + ":host_platform", "//xla/stream_executor", "//xla/stream_executor:device_memory", + "@com_google_absl//absl/types:span", "@local_tsl//tsl/lib/core:status_test_util", + "@local_tsl//tsl/platform:statusor", "@local_tsl//tsl/platform:test", "@local_tsl//tsl/platform:test_main", ], @@ -119,17 +175,22 @@ cc_library( "host_executor.h", ], deps = [ + ":host_execution_engine", + ":host_kernel", ":host_stream", "//xla/stream_executor", "//xla/stream_executor:event_interface", "//xla/stream_executor:stream_executor_interface", "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/types:span", "@local_tsl//tsl/platform:platform_port", + "@local_tsl//tsl/platform:statusor", "@local_tsl//tsl/platform/profile_utils:profile_utils_cpu_utils", ], alwayslink = True, diff --git a/third_party/xla/xla/stream_executor/host/host_execution_engine.cc b/third_party/xla/xla/stream_executor/host/host_execution_engine.cc new file mode 100644 index 00000000000000..f48203bfb3166c --- /dev/null +++ b/third_party/xla/xla/stream_executor/host/host_execution_engine.cc @@ -0,0 +1,390 @@ +/* Copyright 2024 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "xla/stream_executor/host/host_execution_engine.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/ObjectCache.h" +#include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" +#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" +#include "llvm/ExecutionEngine/Orc/LLJIT.h" +#include "llvm/ExecutionEngine/Orc/Mangling.h" +#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" +#include "llvm/ExecutionEngine/Orc/TaskDispatch.h" +#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/OptimizationLevel.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "tsl/platform/statusor.h" + +namespace stream_executor { +namespace host { + +using absl::InternalError; +using absl::StatusOr; +using absl::StrFormat; + +using llvm::Expected; +using llvm::MemoryBuffer; +using llvm::SectionMemoryManager; +using llvm::Triple; + +using llvm::orc::ExecutionSession; +using llvm::orc::ExecutorAddr; +using llvm::orc::InPlaceTaskDispatcher; +using llvm::orc::IRCompileLayer; +using llvm::orc::JITTargetMachineBuilder; +using llvm::orc::RTDyldObjectLinkingLayer; +using llvm::orc::SelfExecutorProcessControl; +using llvm::orc::SimpleCompiler; +using llvm::orc::SymbolMap; +using llvm::orc::ThreadSafeModule; + +namespace { + +// This compiler keeps weak pointers to the TargetMachine and the ObjectCache. +// +// This allows releasing the memory of those objects, even though the LLJIT +// keeps the compiler alive. +// +// We wrote this class based on the code of llvm::orc::ConcurrentIRCompiler. +class WeakCompiler : public IRCompileLayer::IRCompiler { + public: + static llvm::orc::IRSymbolMapper::ManglingOptions + IrManglingOptionsForWeakTargetMachine( + std::weak_ptr weak_target_machine) { + std::shared_ptr target_machine = + weak_target_machine.lock(); + CHECK(target_machine != nullptr) + << "Compiler should not be used after the TargetMachine is destroyed."; + + return llvm::orc::irManglingOptionsFromTargetOptions( + target_machine->Options); + } + + // It's not recommended to allocate the parameters with std::make_shared, + // because that would allocate the object and the control block in one + // allocation, so the weak_ptr would keep alive the memory of the object as + // well. + explicit WeakCompiler(std::weak_ptr weak_target_machine) + : IRCompiler(IrManglingOptionsForWeakTargetMachine(weak_target_machine)), + weak_target_machine_(std::move(weak_target_machine)) {} + + Expected> operator()( + llvm::Module &module) override { + std::shared_ptr target_machine = + weak_target_machine_.lock(); + CHECK(target_machine != nullptr) + << "Compiler should not be used after the TargetMachine is destroyed."; + + SimpleCompiler compiler(*target_machine); + return compiler(module); + } + + private: + std::weak_ptr weak_target_machine_; +}; + +} // namespace + +RuntimeExecutionEngine::RuntimeExecutionEngine(bool enable_gdb_listener, + bool enable_perf_listener) { + if (enable_gdb_listener) + gdb_listener_ = llvm::JITEventListener::createGDBRegistrationListener(); + if (enable_perf_listener) + perf_listener_ = llvm::JITEventListener::createPerfJITEventListener(); +} + +std::unique_ptr RuntimeExecutionEngine::obj_file() const { + return obj_file_ ? MemoryBuffer::getMemBuffer(obj_file_->getMemBufferRef()) + : nullptr; +} + +// -------------------------------------------------------------------------- // + +namespace { +using llvm::DenseMap; + +// Intercept object compilation to save the object file corresponding to the +// XLA executable in the execution engine. +class ExecutionEngineObjectCache : public llvm::ObjectCache { + public: + void notifyObjectCompiled(const llvm::Module *m, + llvm::MemoryBufferRef objBuffer) override; + + std::unique_ptr getObject(const llvm::Module *m) override; + + // Transfer memory buffer from the cache to the caller. + std::unique_ptr stealObject(const llvm::Module *m); + + private: + DenseMap> objs_; +}; +} // namespace + +void ExecutionEngineObjectCache::notifyObjectCompiled( + const llvm::Module *m, llvm::MemoryBufferRef objBuffer) { + objs_[m] = llvm::MemoryBuffer::getMemBufferCopy( + objBuffer.getBuffer(), objBuffer.getBufferIdentifier()); +} + +std::unique_ptr ExecutionEngineObjectCache::getObject( + const llvm::Module *m) { + auto it = objs_.find(m); + if (it == objs_.end()) return nullptr; + return llvm::MemoryBuffer::getMemBuffer(it->second->getMemBufferRef()); +} + +std::unique_ptr ExecutionEngineObjectCache::stealObject( + const llvm::Module *m) { + auto it = objs_.find(m); + if (it == objs_.end()) return nullptr; + return std::move(it->second); +} + +// -------------------------------------------------------------------------- // + +// llvm_ir::DumpToString() is not used here, because we don't want to add too +// many dependencies to the runtime. +static std::string ToString(const llvm::Error &err) { + std::string str; + llvm::raw_string_ostream(str) << err; + return str; +} + +/*static*/ StatusOr> +RuntimeExecutionEngine::CreateFromModule( + std::unique_ptr ctx, + std::unique_ptr module, JitOptions options, + absl::Span exported) { + auto engine = + std::unique_ptr(new RuntimeExecutionEngine( + options.enable_gdb_listener, options.enable_perf_listener)); + + // We'll need module pointer later to lookup object file in the cache. + llvm::Module *module_ptr = module.get(); + + // Set up the target machine details. + if (!options.target_machine) + return InternalError("target machine was not provided"); + module->setDataLayout(options.target_machine->createDataLayout()); + module->setTargetTriple(options.target_machine->getTargetTriple().str()); + + // Run an optimization pipeline over the LLVM module (alway run with default + // opt level independent of the options). + // + // TODO(ezhulenev): We should have out own optimizing transformer pipelines + // for different Xla backends, e.g. there is absolutely no need to run + // SLV vectorizer for Xla Gpi host side executable. + auto transformer = + options.make_optimizing_transformer(options.target_machine.get()); + if (auto err = transformer(module_ptr)) + return InternalError( + StrFormat("failed to run optimization pipeline: %s", ToString(err))); + + // Callback to create the object layer with a user-provided section memory + // mapper and JIT event listeners. + auto obj_layer_creator = [&](ExecutionSession &session, const Triple &tt) { + auto obj_layer = std::make_unique( + session, [section_memory_mapper = options.section_memory_mapper]() { + return std::make_unique(section_memory_mapper); + }); + + // Register JIT event listeners if they are enabled. + if (engine->gdb_listener_) + obj_layer->registerJITEventListener(*engine->gdb_listener_); + if (engine->perf_listener_) + obj_layer->registerJITEventListener(*engine->perf_listener_); + + return obj_layer; + }; + + // Callback to compile IR module on demand. + auto compile_function_creator = + [weak_target_machine = std::weak_ptr( + options.target_machine)](JITTargetMachineBuilder) + -> Expected> { + return std::make_unique(weak_target_machine); + }; + + // Use in-process executor process control with in-place task dispatcher. + auto executorProcessControl = SelfExecutorProcessControl::Create( + nullptr, std::make_unique()); + + if (auto err = executorProcessControl.takeError()) + return InternalError(StrFormat( + "failed to create executor process control: %s", ToString(err))); + + // TODO(b/286475799): Concurrent compilation leads to spurious memory + // corruptions and segfaults at run time, however nothing shows up in tsan + // or asan builds. This is a hack that for some unknown reason helps. + static auto *lljit_mu = new absl::Mutex(); + std::optional lljit_lock(lljit_mu); + + // Construct the LLJIT with the given compiler and object linking layers. + auto jit = llvm::orc::LLJITBuilder() + .setCompileFunctionCreator(std::move(compile_function_creator)) + .setObjectLinkingLayerCreator(obj_layer_creator) + .setExecutorProcessControl(std::move(*executorProcessControl)) + .setNumCompileThreads(0) // disable multi-threading + .create(); + + if (auto err = jit.takeError()) + return InternalError( + StrFormat("failed to construct LLJIT: %s", ToString(err))); + + lljit_lock.reset(); + + // Register input module with the LLJIT. + ThreadSafeModule tsm(std::move(module), std::move(ctx)); + if (auto err = (*jit)->addIRModule(std::move(tsm))) + return InternalError( + StrFormat("failed to add source module: %s", ToString(err))); + + llvm::DataLayout data_layout = (*jit)->getDataLayout(); + + // Resolve all exported functions to function pointers. + for (std::string_view name : exported) { + // Trigger compilation by looking up the exported function. + // TODO(tsilytskyi): + // - Do we need to mangle function name? + // - Do we need to verify/adapt function proto to expected API? + Expected addr = (*jit)->lookup(name); + if (auto err = addr.takeError()) + return InternalError(StrFormat( + "failed to compile exported function %s: %s", name, ToString(err))); + + // Check that we found an address of an exported function. + auto ptr = addr->toPtr(); + if (!ptr) + return InternalError( + StrFormat("exported function %s resolved to null", name)); + + engine->exported_.push_back(ptr); + } + + // Fill remaining fields and return constructed ExecutionEngine to the caller. + engine->jit_ = std::move(*jit); + return std::move(engine); +} + +static std::function +MakeOptimizingTransformerForJit(llvm::TargetMachine *targetMachine) { + return [targetMachine](llvm::Module *m) -> llvm::Error { + llvm::LoopAnalysisManager lam; + llvm::FunctionAnalysisManager fam; + llvm::CGSCCAnalysisManager cgam; + llvm::ModuleAnalysisManager mam; + + llvm::PipelineTuningOptions tuningOptions; + // LLVM's loop unrolling isn't well tuned for the loops we emit. Turn it off + // as it consumes compile time with little benefit. + tuningOptions.LoopUnrolling = false; + // Vectorization happens at the MLIR level. + tuningOptions.LoopVectorization = false; + llvm::PassBuilder pb(targetMachine, tuningOptions); + + pb.registerModuleAnalyses(mam); + pb.registerCGSCCAnalyses(cgam); + pb.registerFunctionAnalyses(fam); + pb.registerLoopAnalyses(lam); + pb.crossRegisterProxies(lam, fam, cgam, mam); + + llvm::ModulePassManager mpm; + mpm.addPass(pb.buildPerModuleDefaultPipeline(llvm::OptimizationLevel::O2)); + mpm.run(*m, mam); + return llvm::Error::success(); + }; +} + +absl::StatusOr> +LlvmExecutionEngine::CreateFromLlvmIr(absl::string_view name, + absl::string_view entry, + absl::string_view ir, + absl::Span options) { + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + auto llvm_ctx = std::make_unique(); + llvm::SMDiagnostic diagnostic; + llvm::MemoryBufferRef ir_buffer(ir, name); + std::unique_ptr llvm_module = + llvm::parseAssembly(ir_buffer, diagnostic, *llvm_ctx, nullptr); + + // Prepare JIT target machine for code generation. + auto builder = llvm::orc::JITTargetMachineBuilder::detectHost(); + if (!builder) return absl::InternalError(toString(builder.takeError())); + + llvm::Expected> target_machine = + builder->createTargetMachine(); + if (!target_machine) + return absl::InternalError(toString(target_machine.takeError())); + + // Set target triple + llvm_module->setTargetTriple( + llvm::StringRef(target_machine.get()->getTargetTriple().getTriple())); + + // Construct options for the XLA runtime execution engine. + RuntimeExecutionEngine::JitOptions engine_options; + engine_options.target_machine = std::move(target_machine.get()); + engine_options.make_optimizing_transformer = MakeOptimizingTransformerForJit; + + std::vector exported = {entry}; + + // Compile input module to the native function. + TF_ASSIGN_OR_RETURN(auto engine, + RuntimeExecutionEngine::CreateFromModule( + std::move(llvm_ctx), std::move(llvm_module), + std::move(engine_options), exported)); + + return std::unique_ptr( + new LlvmExecutionEngine(std::move(engine))); +} + +} // namespace host +} // namespace stream_executor diff --git a/third_party/xla/xla/stream_executor/host/host_execution_engine.h b/third_party/xla/xla/stream_executor/host/host_execution_engine.h new file mode 100644 index 00000000000000..9215ad5ed5e3af --- /dev/null +++ b/third_party/xla/xla/stream_executor/host/host_execution_engine.h @@ -0,0 +1,157 @@ +/* Copyright 2024 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_STREAM_EXECUTOR_HOST_HOST_EXECUTION_ENGINE_H_ +#define XLA_STREAM_EXECUTOR_HOST_HOST_EXECUTION_ENGINE_H_ + +#include +#include +#include +#include +#include + +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" +#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/Orc/LLJIT.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Target/TargetMachine.h" +#include "xla/stream_executor/host/host_kernel_c_api.h" + +namespace stream_executor::host { + +class RuntimeExecutionEngine { + public: + using ExportedFunctionPtr = const SE_HOST_Kernel *; + + // Callback to run optimization passes on the compiled LLVM module. + using OptimizingTransformer = std::function; + + // Callback to construct an optimizing transformer for the given options. + using MakeOptimizingTransformer = + std::function; + + //------------------------------------------------------------------------- // + // Options for creating execution engine from an LLVM module. + //------------------------------------------------------------------------- // + + struct JitOptions { + // User-provided codegen optimization level. + llvm::CodeGenOptLevel opt_level = llvm::CodeGenOptLevel::Default; + + // User-provided target machine specification. + std::shared_ptr target_machine = nullptr; + + // User-provided builder for the optimizing transformer. + MakeOptimizingTransformer make_optimizing_transformer; + + // User-provided memory mapper for allocating memory for executables. + llvm::SectionMemoryManager::MemoryMapper *section_memory_mapper = nullptr; + + // Notify the llvm's global GDB notifications listener. + bool enable_gdb_listener = false; + + // Notify the llvm's global Perf notifications listener. + bool enable_perf_listener = false; + }; + + // Creates a new execution engine by compiling the provided LLVM module to + // a native executable using LLVM ORC stack. + static absl::StatusOr> + CreateFromModule(std::unique_ptr ctx, + std::unique_ptr module, JitOptions options, + absl::Span exported); + + //------------------------------------------------------------------------- // + + // Returns a pointer to the exported function. + absl::Span exported() const { return exported_; } + + ExportedFunctionPtr exported(unsigned ordinal) const { + return exported_[ordinal]; + } + + // Return a memory buffer with a object file behind this execution engine. Can + // be null if execution engine didn't save the compiled object file. + std::unique_ptr obj_file() const; + + private: + RuntimeExecutionEngine(bool enable_gdb_listener, bool enable_perf_listener); + + // We build execution engine on top of the ORC LLJIT API, which owns all + // compiled/loaded object files and does the linking at run time. + // + // TODO(ezhulenev): Instead of keeping LLJIT alive we should be able to keep + // only llvm::orc::JITDylibSP owning main dylib and the object layer owning + // memory-mapped regions holding object files. Once we are done with + // executable compilation this jit is defunct because it holds an expired + // weak_ptr to an llvm::orc::TargetMachine instance. + std::unique_ptr jit_; + + // Pointers to resolved exported functions. Indexed by function ordinal. + std::vector exported_; + + // Object file behind the compiled executable. Can be null. + std::unique_ptr obj_file_; + + llvm::JITEventListener *gdb_listener_ = nullptr; + llvm::JITEventListener *perf_listener_ = nullptr; +}; + +// Virtual base class that owns jit-compiled function. +class HostExecutionEngine { + public: + virtual ~HostExecutionEngine() = default; + virtual SE_HOST_Kernel *kernel() const = 0; +}; + +class LlvmExecutionEngine : public HostExecutionEngine { + public: + SE_HOST_Kernel *kernel() const override { return kernel_; } + // TODO(tsilytskyi): clean up kernel_ + ~LlvmExecutionEngine() override = default; + + static absl::StatusOr> CreateFromLlvmIr( + absl::string_view name, absl::string_view entry, absl::string_view ir, + absl::Span options); + + private: + explicit LlvmExecutionEngine( + std::unique_ptr exec_engine) + : engine_(std::move(exec_engine)) { + kernel_ = reinterpret_cast(engine_->exported(0)); + }; + std::unique_ptr engine_; + SE_HOST_Kernel *kernel_; +}; + +class CppExecutionEngine : public HostExecutionEngine { + public: + ~CppExecutionEngine() override = default; + SE_HOST_Kernel *kernel() const override { return kernel_; } + + private: + CppExecutionEngine() = default; + SE_HOST_Kernel *kernel_ = nullptr; +}; + +} // namespace stream_executor::host + +#endif // XLA_STREAM_EXECUTOR_HOST_HOST_EXECUTION_ENGINE_H_ diff --git a/third_party/xla/xla/stream_executor/host/host_executor.cc b/third_party/xla/xla/stream_executor/host/host_executor.cc index 8d6eb9dcdcf45e..3d59b1c69ea587 100644 --- a/third_party/xla/xla/stream_executor/host/host_executor.cc +++ b/third_party/xla/xla/stream_executor/host/host_executor.cc @@ -22,24 +22,31 @@ limitations under the License. #include #include +#include #include #include "absl/functional/any_invocable.h" #include "absl/log/check.h" +#include "absl/log/log.h" #include "absl/status/status.h" #include "absl/status/statusor.h" -#include "absl/strings/numbers.h" -#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "absl/synchronization/notification.h" +#include "absl/types/span.h" #include "xla/stream_executor/device_description.h" #include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/event.h" #include "xla/stream_executor/event_interface.h" +#include "xla/stream_executor/host/host_execution_engine.h" +#include "xla/stream_executor/host/host_kernel.h" #include "xla/stream_executor/host/host_stream.h" +#include "xla/stream_executor/kernel_spec.h" +#include "xla/stream_executor/launch_dim.h" #include "xla/stream_executor/stream_executor.h" #include "xla/stream_executor/stream_executor_interface.h" #include "tsl/platform/mem.h" #include "tsl/platform/profile_utils/cpu_utils.h" +#include "tsl/platform/statusor.h" namespace stream_executor { namespace host { @@ -51,6 +58,49 @@ HostStream* AsHostStream(Stream* stream) { absl::Status HostExecutor::Init() { return absl::OkStatus(); } +absl::StatusOr> HostExecutor::CreateKernel() { + return std::make_unique(); +} + +absl::Status HostExecutor::GetKernel(const MultiKernelLoaderSpec& spec, + Kernel* kernel) { + HostKernel* host_kernel = AsHostKernel(kernel); + host_kernel->SetArity(spec.arity()); + + VLOG(3) << "GetKernel on kernel " << kernel << " : " << kernel->name(); + + if (spec.has_llvm_host_kernel()) { + const LlvmHostKernel& llvm_host_kernel = spec.llvm_host_kernel(); + const absl::string_view name = llvm_host_kernel.kernel_name(); + const absl::string_view entry = llvm_host_kernel.entrypoint(); + const absl::string_view ir = llvm_host_kernel.ir(); + const absl::Span options = llvm_host_kernel.options(); + + TF_ASSIGN_OR_RETURN( + auto execution_engine, + LlvmExecutionEngine::CreateFromLlvmIr(name, entry, ir, options)); + host_kernel->SetExecutionEngine(std::move(execution_engine)); + return absl::OkStatus(); + } else if (false /* TODO(tsilytskyi): Implement CppHostKernel */) { + // host_kernel->SetExecutionEngine(std::make_unique()); + } else { + return absl::InternalError("No method of loading host kernel provided"); + } + + return absl::UnimplementedError("Not Implemented"); +} + +absl::Status HostExecutor::Launch(Stream* stream, const ThreadDim& thread_dims, + const BlockDim& block_dims, + const Kernel& kernel, + const KernelArgs& args) { + // const HostKernel* host_kernel = AsHostKernel(&kernel); + + // TODO(tsilytskyi): convert args into proper format + // host_kernel->Launch(thread_dims, args); + return absl::UnimplementedError("Not Implemented"); +} + bool HostExecutor::DeviceMemoryUsage(int64_t* free, int64_t* total) const { tsl::port::MemoryInfo mem_info = tsl::port::GetMemoryInfo(); *free = (mem_info.free != INT64_MAX) ? mem_info.free : -1; diff --git a/third_party/xla/xla/stream_executor/host/host_executor.h b/third_party/xla/xla/stream_executor/host/host_executor.h index 8c781096d6c874..40cb0ccea89330 100644 --- a/third_party/xla/xla/stream_executor/host/host_executor.h +++ b/third_party/xla/xla/stream_executor/host/host_executor.h @@ -57,14 +57,13 @@ class HostExecutor : public StreamExecutor { absl::Status Init() override; absl::Status GetKernel(const MultiKernelLoaderSpec& spec, - Kernel* kernel) override { - return absl::UnimplementedError("Not Implemented"); - } + Kernel* kernel) override; + + absl::StatusOr> CreateKernel() override; + absl::Status Launch(Stream* stream, const ThreadDim& thread_dims, const BlockDim& block_dims, const Kernel& kernel, - const KernelArgs& args) override { - return absl::UnimplementedError("Not Implemented"); - } + const KernelArgs& args) override; DeviceMemoryBase Allocate(uint64_t size, int64_t memory_space) override; void Deallocate(DeviceMemoryBase* mem) override; diff --git a/third_party/xla/xla/stream_executor/host/host_kernel.cc b/third_party/xla/xla/stream_executor/host/host_kernel.cc index e2d37085a1c756..b208ec453b9e0e 100644 --- a/third_party/xla/xla/stream_executor/host/host_kernel.cc +++ b/third_party/xla/xla/stream_executor/host/host_kernel.cc @@ -29,8 +29,9 @@ namespace stream_executor::host { HostKernel::HostKernel(unsigned arity, SE_HOST_Kernel* kernel) : arity_(arity), kernel_(kernel) {} -absl::Status HostKernel::Launch(const ThreadDim& thread_dims, - absl::Span buffers) { +absl::Status HostKernel::Launch( + const ThreadDim& thread_dims, + absl::Span buffers) const { SE_HOST_KernelThreadDim kernel_thread_dims = {thread_dims.x, thread_dims.y, thread_dims.z}; diff --git a/third_party/xla/xla/stream_executor/host/host_kernel.h b/third_party/xla/xla/stream_executor/host/host_kernel.h index ee8f67738bf08c..b06fbe76b244f2 100644 --- a/third_party/xla/xla/stream_executor/host/host_kernel.h +++ b/third_party/xla/xla/stream_executor/host/host_kernel.h @@ -18,26 +18,35 @@ limitations under the License. #include #include +#include +#include +#include #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/types/span.h" #include "xla/stream_executor/device_memory.h" +#include "xla/stream_executor/host/host_execution_engine.h" #include "xla/stream_executor/host/host_kernel_c_api.h" #include "xla/stream_executor/kernel.h" #include "xla/stream_executor/launch_dim.h" namespace stream_executor::host { +class HostExecutor; + class HostKernel : public Kernel { public: - HostKernel(unsigned arity, SE_HOST_Kernel* kernel); + explicit HostKernel() = default; + + // TODO(tsilytskyi): make this implementation detail private + explicit HostKernel(unsigned arity, SE_HOST_Kernel* kernel); - // TODO(b/331430625): Connect this API to Launch API defined at StreamExecutor - // level, which requires refactoring how arguments passed to kernels, as - // current KernelArgs structure tied to the GPU kernel ABI. + // TODO(b/331430625): Connect this API to Launch API defined at + // StreamExecutor level, which requires refactoring how arguments passed to + // kernels, as current KernelArgs structure tied to the GPU kernel ABI. absl::Status Launch(const ThreadDim& thread_dims, - absl::Span buffers); + absl::Span buffers) const; // For host platform, we assume that a core is a thread, and we can run at // most one instance of a kernel on a given thread. @@ -46,13 +55,31 @@ class HostKernel : public Kernel { return 1; }; + void SetArity(unsigned arity) { arity_ = arity; }; unsigned Arity() const override { return arity_; }; + template + void SetExecutionEngine(std::unique_ptr execution_engine) { + static_assert(std::is_base_of::value, + "T is not derived from HostExecutionEngine"); + execution_engine_ = std::move(execution_engine); + } + private: + std::unique_ptr execution_engine_; + unsigned arity_; SE_HOST_Kernel* kernel_ = nullptr; }; +inline const HostKernel* AsHostKernel(const Kernel* kernel) { + return static_cast(kernel); +} + +inline HostKernel* AsHostKernel(Kernel* kernel) { + return static_cast(kernel); +} + } // namespace stream_executor::host #endif // XLA_STREAM_EXECUTOR_HOST_HOST_KERNEL_H_ diff --git a/third_party/xla/xla/stream_executor/host/host_kernel_test.cc b/third_party/xla/xla/stream_executor/host/host_kernel_test.cc index 6bf3439d2e95e9..2dcb432cf81bf4 100644 --- a/third_party/xla/xla/stream_executor/host/host_kernel_test.cc +++ b/third_party/xla/xla/stream_executor/host/host_kernel_test.cc @@ -16,12 +16,20 @@ limitations under the License. #include "xla/stream_executor/host/host_kernel.h" #include +#include +#include #include +#include "absl/types/span.h" #include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/host/host_kernel_c_api.h" +#include "xla/stream_executor/kernel_spec.h" #include "xla/stream_executor/launch_dim.h" +#include "xla/stream_executor/platform.h" +#include "xla/stream_executor/platform_manager.h" +#include "xla/stream_executor/stream_executor.h" #include "tsl/lib/core/status_test_util.h" +#include "tsl/platform/statusor.h" #include "tsl/platform/test.h" namespace stream_executor::host { @@ -41,6 +49,38 @@ static SE_HOST_KernelError* AddI32(const SE_HOST_KernelCallFrame* call_frame) { return nullptr; } +static const char* llvm_kernel_add = R"( +%SE_HOST_KernelCallFrame = type { ptr, ptr, i64, ptr } +%struct.SE_HOST_KernelArg = type { ptr, i64 } + +define ptr @LlvmAddI32(ptr noundef %0) { + %2 = getelementptr inbounds %SE_HOST_KernelCallFrame, ptr %0, i32 0, i32 3 + %3 = load ptr, ptr %2, align 8 + %4 = getelementptr inbounds %struct.SE_HOST_KernelArg, ptr %3, i64 1 + %5 = getelementptr inbounds %struct.SE_HOST_KernelArg, ptr %3, i64 2 + %6 = load ptr, ptr %3, align 8 + %7 = load ptr, ptr %4, align 8 + %8 = load ptr, ptr %5, align 8 + %9 = getelementptr inbounds %SE_HOST_KernelCallFrame, ptr %0, i32 0, i32 1 + %10 = load ptr, ptr %9, align 8 + %11 = load i64, ptr %10, align 8 + %12 = getelementptr inbounds i32, ptr %6, i64 %11 + %13 = load i32, ptr %12, align 4 + %14 = getelementptr inbounds i32, ptr %7, i64 %11 + %15 = load i32, ptr %14, align 4 + %16 = add nsw i32 %13, %15 + %17 = getelementptr inbounds i32, ptr %8, i64 %11 + store i32 %16, ptr %17, align 4 + ret ptr null +} +)"; + +static std::unique_ptr NewStreamExecutor() { + Platform* platform = PlatformManager::PlatformWithName("Host").value(); + StreamExecutorConfig config(/*ordinal=*/0); + return platform->GetUncachedExecutor(config).value(); +} + TEST(HostKernelTest, Addition) { HostKernel kernel(/*arity=*/3, AddI32); @@ -59,4 +99,31 @@ TEST(HostKernelTest, Addition) { EXPECT_EQ(out, expected); } +TEST(HostKernelTest, LlvmAddition) { + std::vector lhs = {1, 2, 3, 4}; + std::vector rhs = {5, 6, 7, 8}; + std::vector out = {0, 0, 0, 0}; + + DeviceMemoryBase lhs_mem(lhs.data(), lhs.size() * sizeof(int32_t)); + DeviceMemoryBase rhs_mem(rhs.data(), rhs.size() * sizeof(int32_t)); + DeviceMemoryBase out_mem(out.data(), out.size() * sizeof(int32_t)); + std::vector args = {lhs_mem, rhs_mem, out_mem}; + + MultiKernelLoaderSpec spec(/*arity=*/3); + spec.AddLlvmHostKernel(llvm_kernel_add, "LlvmAddI32", "LlvmAddI32", + absl::Span()); + + auto executor = NewStreamExecutor(); + auto eg = executor.get(); + EXPECT_NE(eg, nullptr); + TF_ASSERT_OK_AND_ASSIGN(auto add, HostKernel::Create(eg, spec)); + + // TODO(tsilytskyi): implement Launch part + // TF_ASSERT_OK(executor->Launch(ThreadDim(4), args)); + + // std::vector expected = {6, 8, 10, 12}; + // EXPECT_EQ(out, expected); + // EXPECT_TRUE(true); +} + } // namespace stream_executor::host From c0cfee827b6d49e3aecd2de1837269bc2fbe9ad8 Mon Sep 17 00:00:00 2001 From: Greg Olechwierowicz Date: Mon, 13 May 2024 09:02:53 -0700 Subject: [PATCH 025/478] [XLA:GPU] Rename LoopDoubleBufferTransformer to DoubleBufferLoopUnrolling. PiperOrigin-RevId: 633228767 --- third_party/xla/xla/service/gpu/BUILD | 14 ++++---- ...mer.cc => double_buffer_loop_unrolling.cc} | 4 +-- ...ormer.h => double_buffer_loop_unrolling.h} | 12 +++---- ...c => double_buffer_loop_unrolling_test.cc} | 36 +++++++++---------- .../xla/xla/service/gpu/gpu_compiler.cc | 14 ++++---- 5 files changed, 39 insertions(+), 41 deletions(-) rename third_party/xla/xla/service/gpu/{loop_double_buffer_transformer.cc => double_buffer_loop_unrolling.cc} (99%) rename third_party/xla/xla/service/gpu/{loop_double_buffer_transformer.h => double_buffer_loop_unrolling.h} (88%) rename third_party/xla/xla/service/gpu/{loop_double_buffer_transformer_test.cc => double_buffer_loop_unrolling_test.cc} (97%) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index fec7fc3ea976f5..a851ea99b9b182 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -3433,7 +3433,7 @@ cc_library( ":instruction_fusion", ":ir_emission_utils", ":ir_emitter", - ":loop_double_buffer_transformer", + ":double_buffer_loop_unrolling", ":matmul_utils", ":metrics", ":move_copy_to_users", @@ -5803,9 +5803,9 @@ xla_cc_test( ) cc_library( - name = "loop_double_buffer_transformer", - srcs = ["loop_double_buffer_transformer.cc"], - hdrs = ["loop_double_buffer_transformer.h"], + name = "double_buffer_loop_unrolling", + srcs = ["double_buffer_loop_unrolling.cc"], + hdrs = ["double_buffer_loop_unrolling.h"], deps = [ "//xla:status", "//xla:util", @@ -5829,10 +5829,10 @@ cc_library( ) xla_cc_test( - name = "loop_double_buffer_transformer_test", - srcs = ["loop_double_buffer_transformer_test.cc"], + name = "double_buffer_loop_unrolling_test", + srcs = ["double_buffer_loop_unrolling_test.cc"], deps = [ - ":loop_double_buffer_transformer", + ":double_buffer_loop_unrolling", "//xla:test", "//xla:xla_data_proto_cc", "//xla:xla_proto_cc", diff --git a/third_party/xla/xla/service/gpu/loop_double_buffer_transformer.cc b/third_party/xla/xla/service/gpu/double_buffer_loop_unrolling.cc similarity index 99% rename from third_party/xla/xla/service/gpu/loop_double_buffer_transformer.cc rename to third_party/xla/xla/service/gpu/double_buffer_loop_unrolling.cc index 36f4d81a452a02..4c10da5b8aa047 100644 --- a/third_party/xla/xla/service/gpu/loop_double_buffer_transformer.cc +++ b/third_party/xla/xla/service/gpu/double_buffer_loop_unrolling.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "xla/service/gpu/loop_double_buffer_transformer.h" +#include "xla/service/gpu/double_buffer_loop_unrolling.h" #include #include @@ -362,7 +362,7 @@ absl::StatusOr DoubleBufferingUnroll(HloInstruction* while_instr, } // namespace -absl::StatusOr LoopDoubleBufferTransformer::Run( +absl::StatusOr DoubleBufferLoopUnrolling::Run( HloModule* module, const absl::flat_hash_set& execution_threads) { bool changed = false; diff --git a/third_party/xla/xla/service/gpu/loop_double_buffer_transformer.h b/third_party/xla/xla/service/gpu/double_buffer_loop_unrolling.h similarity index 88% rename from third_party/xla/xla/service/gpu/loop_double_buffer_transformer.h rename to third_party/xla/xla/service/gpu/double_buffer_loop_unrolling.h index f0d891a09ab685..120070dbccd452 100644 --- a/third_party/xla/xla/service/gpu/loop_double_buffer_transformer.h +++ b/third_party/xla/xla/service/gpu/double_buffer_loop_unrolling.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef XLA_SERVICE_GPU_LOOP_DOUBLE_BUFFER_TRANSFORMER_H_ -#define XLA_SERVICE_GPU_LOOP_DOUBLE_BUFFER_TRANSFORMER_H_ +#ifndef XLA_SERVICE_GPU_DOUBLE_BUFFER_LOOP_UNROLLING_H_ +#define XLA_SERVICE_GPU_DOUBLE_BUFFER_LOOP_UNROLLING_H_ #include "absl/container/flat_hash_set.h" #include "absl/status/statusor.h" @@ -47,14 +47,14 @@ namespace gpu { // unrolled. // TODO(olechwierowicz): Rename the loop unroller to something more generic like // 'DoubleBufferLoopUnrolling'. -class LoopDoubleBufferTransformer : public HloModulePass { +class DoubleBufferLoopUnrolling : public HloModulePass { public: enum class UnrollStrategy { kDoubleBuffer, kFullUnroll }; - explicit LoopDoubleBufferTransformer( + explicit DoubleBufferLoopUnrolling( UnrollStrategy unroll_strategy = UnrollStrategy::kDoubleBuffer) : unroll_strategy_(unroll_strategy) {}; - ~LoopDoubleBufferTransformer() override = default; + ~DoubleBufferLoopUnrolling() override = default; absl::string_view name() const override { return "loop-double-buffer-transformer"; @@ -72,4 +72,4 @@ class LoopDoubleBufferTransformer : public HloModulePass { } // end namespace gpu } // end namespace xla -#endif // XLA_SERVICE_GPU_LOOP_DOUBLE_BUFFER_TRANSFORMER_H_ +#endif // XLA_SERVICE_GPU_DOUBLE_BUFFER_LOOP_UNROLLING_H_ diff --git a/third_party/xla/xla/service/gpu/loop_double_buffer_transformer_test.cc b/third_party/xla/xla/service/gpu/double_buffer_loop_unrolling_test.cc similarity index 97% rename from third_party/xla/xla/service/gpu/loop_double_buffer_transformer_test.cc rename to third_party/xla/xla/service/gpu/double_buffer_loop_unrolling_test.cc index 1cac4fa2c1991c..b7ff9a24de7e2e 100644 --- a/third_party/xla/xla/service/gpu/loop_double_buffer_transformer_test.cc +++ b/third_party/xla/xla/service/gpu/double_buffer_loop_unrolling_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "xla/service/gpu/loop_double_buffer_transformer.h" +#include "xla/service/gpu/double_buffer_loop_unrolling.h" #include #include @@ -102,8 +102,8 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleString)); - LoopDoubleBufferTransformer double_buffer( - LoopDoubleBufferTransformer::UnrollStrategy::kFullUnroll); + DoubleBufferLoopUnrolling double_buffer( + DoubleBufferLoopUnrolling::UnrollStrategy::kFullUnroll); TupleSimplifier tuple_simp; bool changed; TF_ASSERT_OK_AND_ASSIGN(changed, double_buffer.Run(module.get())); @@ -166,8 +166,8 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleString)); - LoopDoubleBufferTransformer double_buffer( - LoopDoubleBufferTransformer::UnrollStrategy::kFullUnroll); + DoubleBufferLoopUnrolling double_buffer( + DoubleBufferLoopUnrolling::UnrollStrategy::kFullUnroll); TupleSimplifier tuple_simp; bool changed; TF_ASSERT_OK_AND_ASSIGN(changed, double_buffer.Run(module.get())); @@ -235,7 +235,7 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleString)); - LoopDoubleBufferTransformer double_buffer; + DoubleBufferLoopUnrolling double_buffer; TupleSimplifier tuple_simp; bool changed; TF_ASSERT_OK_AND_ASSIGN(changed, double_buffer.Run(module.get())); @@ -307,7 +307,7 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleString)); - LoopDoubleBufferTransformer double_buffer; + DoubleBufferLoopUnrolling double_buffer; TupleSimplifier tuple_simp; EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); EXPECT_THAT(tuple_simp.Run(module.get()), IsOkAndHolds(true)); @@ -372,7 +372,7 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleString)); - LoopDoubleBufferTransformer double_buffer; + DoubleBufferLoopUnrolling double_buffer; TupleSimplifier tuple_simp; EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); EXPECT_THAT(tuple_simp.Run(module.get()), IsOkAndHolds(true)); @@ -444,7 +444,7 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleString)); - LoopDoubleBufferTransformer double_buffer; + DoubleBufferLoopUnrolling double_buffer; TupleSimplifier tuple_simp; EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); EXPECT_THAT(tuple_simp.Run(module.get()), IsOkAndHolds(true)); @@ -517,8 +517,8 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleString)); - LoopDoubleBufferTransformer double_buffer( - LoopDoubleBufferTransformer::UnrollStrategy::kFullUnroll); + DoubleBufferLoopUnrolling double_buffer( + DoubleBufferLoopUnrolling::UnrollStrategy::kFullUnroll); TupleSimplifier tuple_simp; EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); EXPECT_THAT(tuple_simp.Run(module.get()), IsOkAndHolds(true)); @@ -592,7 +592,7 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleString)); - LoopDoubleBufferTransformer double_buffer; + DoubleBufferLoopUnrolling double_buffer; EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); absl::flat_hash_set while_loops_callees; @@ -651,7 +651,7 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleString)); - LoopDoubleBufferTransformer double_buffer; + DoubleBufferLoopUnrolling double_buffer; EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); absl::flat_hash_set while_loops_callees; @@ -710,8 +710,8 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleString)); - LoopDoubleBufferTransformer double_buffer( - LoopDoubleBufferTransformer::UnrollStrategy::kFullUnroll); + DoubleBufferLoopUnrolling double_buffer( + DoubleBufferLoopUnrolling::UnrollStrategy::kFullUnroll); EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); absl::flat_hash_set while_loops_callees; @@ -776,7 +776,7 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleString)); - LoopDoubleBufferTransformer double_buffer; + DoubleBufferLoopUnrolling double_buffer; EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); int64_t num_whiles = 0; @@ -830,8 +830,8 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(kModuleString)); - LoopDoubleBufferTransformer double_buffer( - LoopDoubleBufferTransformer::UnrollStrategy::kFullUnroll); + DoubleBufferLoopUnrolling double_buffer( + DoubleBufferLoopUnrolling::UnrollStrategy::kFullUnroll); EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); int64_t num_whiles = 0; diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index 2d33d67d2d6064..02673628e26980 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -117,6 +117,7 @@ limitations under the License. #include "xla/service/gpu/custom_kernel_fusion_rewriter.h" #include "xla/service/gpu/dot_dimension_sorter.h" #include "xla/service/gpu/dot_operand_converter.h" +#include "xla/service/gpu/double_buffer_loop_unrolling.h" #include "xla/service/gpu/fusion_pipeline.h" #include "xla/service/gpu/fusion_wrapper.h" #include "xla/service/gpu/gemm_broadcast_folding_rewriter.h" @@ -141,7 +142,6 @@ limitations under the License. #include "xla/service/gpu/ir_emission_utils.h" #include "xla/service/gpu/ir_emitter_context.h" #include "xla/service/gpu/ir_emitter_unnested.h" -#include "xla/service/gpu/loop_double_buffer_transformer.h" #include "xla/service/gpu/matmul_utils.h" #include "xla/service/gpu/metrics.h" #include "xla/service/gpu/model/gpu_cost_model_stats_collection.h" @@ -1096,28 +1096,26 @@ absl::Status RunPostFusionPasses( pipeline.AddPass(blueconnect_num_devices_per_host); } - std::optional unroll_strategy = + std::optional unroll_strategy = std::nullopt; // Support old flag. if (opts.xla_gpu_enable_while_loop_double_buffering()) { - unroll_strategy = - LoopDoubleBufferTransformer::UnrollStrategy::kDoubleBuffer; + unroll_strategy = DoubleBufferLoopUnrolling::UnrollStrategy::kDoubleBuffer; } // Support new flag setting style, override the old one. if (opts.xla_gpu_enable_while_loop_unrolling() == DebugOptions::WHILE_LOOP_UNROLLING_DOUBLE_BUFFER) { - unroll_strategy = - LoopDoubleBufferTransformer::UnrollStrategy::kDoubleBuffer; + unroll_strategy = DoubleBufferLoopUnrolling::UnrollStrategy::kDoubleBuffer; } if (opts.xla_gpu_enable_while_loop_unrolling() == DebugOptions::WHILE_LOOP_UNROLLING_FULL_UNROLL) { LOG_IF(WARNING, unroll_strategy != std::nullopt) << "Overriding double buffering set via " "`xla_gpu_enable_while_loop_double_buffering` flag."; - unroll_strategy = LoopDoubleBufferTransformer::UnrollStrategy::kFullUnroll; + unroll_strategy = DoubleBufferLoopUnrolling::UnrollStrategy::kFullUnroll; } if (unroll_strategy != std::nullopt) { - pipeline.AddPass(*unroll_strategy); + pipeline.AddPass(*unroll_strategy); pipeline.AddPass(); pipeline.AddPass(); } From 96ed1fe06391abb0ed9851c70066eb3b7edd5e34 Mon Sep 17 00:00:00 2001 From: Kyle Lucke Date: Mon, 13 May 2024 09:17:33 -0700 Subject: [PATCH 026/478] Remvoe unused, unimplemented methods on XlaInterpreterExecutor. PiperOrigin-RevId: 633233078 --- third_party/xla/xla/backends/interpreter/executor.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/third_party/xla/xla/backends/interpreter/executor.h b/third_party/xla/xla/backends/interpreter/executor.h index 909d7fbb1bc513..9f72e99dd0a69d 100644 --- a/third_party/xla/xla/backends/interpreter/executor.h +++ b/third_party/xla/xla/backends/interpreter/executor.h @@ -175,11 +175,6 @@ class XlaInterpreterExecutor : public StreamExecutor { // The device ordinal value that this executor was initialized with; recorded // for use in getting device metadata. Immutable post-initialization. int device_ordinal_; - - DeviceMemoryBase AllocateSingleOutput(const xla::Shape &shape); - - absl::StatusOr AllocateOutputBuffer( - const xla::Shape &shape); }; } // namespace interpreter From d6222cd597f6c589aaace0719323433ff9b3f061 Mon Sep 17 00:00:00 2001 From: Bart Chrzaszcz Date: Mon, 13 May 2024 09:17:46 -0700 Subject: [PATCH 027/478] Don't fail diagnostic if the severity isn't error. If a pass emits a warning, then the status will be set to `UnknownError` causing pipelines to fail. But we shouldn't if a pass/pattern just emits a warning. PiperOrigin-RevId: 633233147 --- .../tsl/tsl/framework/mlir/status_scoped_diagnostic_handler.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/xla/third_party/tsl/tsl/framework/mlir/status_scoped_diagnostic_handler.cc b/third_party/xla/third_party/tsl/tsl/framework/mlir/status_scoped_diagnostic_handler.cc index 8882ff8e66cf1d..b2e447a5e778a0 100644 --- a/third_party/xla/third_party/tsl/tsl/framework/mlir/status_scoped_diagnostic_handler.cc +++ b/third_party/xla/third_party/tsl/tsl/framework/mlir/status_scoped_diagnostic_handler.cc @@ -59,6 +59,7 @@ mlir::LogicalResult StatusScopedDiagnosticHandler::handleDiagnostic( // Emit non-errors to VLOG instead of the internal status. if (diag.getSeverity() != mlir::DiagnosticSeverity::Error) { VLOG(1) << diag_str_; + return mlir::success(); } status_.Update(absl::UnknownError(diag_str_)); From daa1a97362889e183fb8df19f94666647073ce9e Mon Sep 17 00:00:00 2001 From: Kyle Lucke Date: Mon, 13 May 2024 09:19:44 -0700 Subject: [PATCH 028/478] Remove StreamExecutorInterface from all the non-static CommandBuffer methods. Each CommandBuffer class is uniquely tied to a specific StreamExecutorInterface (parent_), which is tracked as member data. PiperOrigin-RevId: 633233717 --- .../service/gpu/runtime/command_buffer_cmd.cc | 19 +-- .../xla/xla/stream_executor/command_buffer.h | 43 ++---- .../stream_executor/gpu/gpu_command_buffer.cc | 122 +++++++----------- .../stream_executor/gpu/gpu_command_buffer.h | 38 ++---- .../gpu/gpu_command_buffer_test.cc | 50 ++++--- 5 files changed, 106 insertions(+), 166 deletions(-) diff --git a/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd.cc b/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd.cc index 8a4af21e1b3b10..b415fdd14e15a5 100644 --- a/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd.cc +++ b/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd.cc @@ -292,7 +292,6 @@ absl::Status CommandBufferCmdSequence::Record( } } - se::StreamExecutor* device = execute_params.stream->parent(); const ModuleAnnotations* annotations = GetCurrentModuleAnnotations(); // Track the number of commands recorded between barriers. @@ -309,7 +308,7 @@ absl::Status CommandBufferCmdSequence::Record( << num_recorded_commands[execution_scope_id] << " recorded commands into the execution scope #" << execution_scope_id.value(); - TF_RETURN_IF_ERROR(command_buffer->Barrier(device, execution_scope_id)); + TF_RETURN_IF_ERROR(command_buffer->Barrier(execution_scope_id)); num_recorded_commands.erase(execution_scope_id); } VLOG(5) << " Record command buffer with scope id " @@ -849,8 +848,7 @@ absl::Status IfCmd::Record(const Thunk::ExecuteParams& execute_params, VLOG(5) << " pred: " << pred_ << " (" << pred.opaque() << ")"; return command_buffer->If( - execution_scope_id, execute_params.stream->parent(), - se::DeviceMemory(pred), + execution_scope_id, se::DeviceMemory(pred), CreateBuilder(&then_commands_, &execute_params, &record_params)); } @@ -893,8 +891,7 @@ absl::Status IfElseCmd::Record(const Thunk::ExecuteParams& execute_params, VLOG(5) << " pred: " << pred_ << " (" << pred.opaque() << ")"; return command_buffer->IfElse( - execution_scope_id, execute_params.stream->parent(), - se::DeviceMemory(pred), + execution_scope_id, se::DeviceMemory(pred), CreateBuilder(&then_commands_, &execute_params, &record_params), CreateBuilder(&else_commands_, &execute_params, &record_params)); } @@ -939,7 +936,6 @@ absl::Status CaseCmd::Record(const Thunk::ExecuteParams& execute_params, VLOG(5) << " index: " << index_ << " (" << index.opaque() << ")"; return command_buffer->Case(execution_scope_id, - execute_params.stream->parent(), se::DeviceMemory(index), CreateBuilders(absl::MakeSpan(branches_commands_), &execute_params, &record_params)); @@ -985,7 +981,7 @@ absl::Status ForCmd::Record(const Thunk::ExecuteParams& execute_params, << loop_counter.opaque() << ")"; return command_buffer->For( - execution_scope_id, execute_params.stream->parent(), num_iterations_, + execution_scope_id, num_iterations_, se::DeviceMemory(loop_counter), CreateBuilder(&body_commands_, &execute_params, &record_params)); } @@ -1030,8 +1026,7 @@ absl::Status WhileCmd::Record(const Thunk::ExecuteParams& execute_params, VLOG(5) << " pred: " << pred_ << " (" << pred.opaque() << ")"; return command_buffer->While( - execution_scope_id, execute_params.stream->parent(), - se::DeviceMemory(pred), + execution_scope_id, se::DeviceMemory(pred), CreateExecutionScopeBuilder(&cond_commands_, &execute_params, &record_params), CreateBuilder(&body_commands_, &execute_params, &record_params)); @@ -1340,7 +1335,6 @@ absl::Status BarrierCmd::Record(const Thunk::ExecuteParams& execute_params, << " to stream " << execution_stream_id().value(); if (from_stream_id_ != execution_stream_id()) { TF_RETURN_IF_ERROR(command_buffer->Barrier( - execute_params.stream->parent(), CommandBufferCmd::GetExecutionScope(record_params, from_stream_id_), CommandBufferCmd::GetExecutionScope(record_params, execution_stream_id()))); @@ -1367,8 +1361,7 @@ absl::Status CollectiveCmd::BarrierIfAsync( const CommandBufferCmd::RecordParams& record_params) { if (IsAsync()) { TF_RETURN_IF_ERROR( - command_buffer->Barrier(executor, - CommandBufferCmd::GetExecutionScope( + command_buffer->Barrier(CommandBufferCmd::GetExecutionScope( record_params, async_from_stream_id_), CommandBufferCmd::GetExecutionScope( record_params, execution_stream_id()))); diff --git a/third_party/xla/xla/stream_executor/command_buffer.h b/third_party/xla/xla/stream_executor/command_buffer.h index 21ee0bc79f306e..1237493a2fb55e 100644 --- a/third_party/xla/xla/stream_executor/command_buffer.h +++ b/third_party/xla/xla/stream_executor/command_buffer.h @@ -202,25 +202,20 @@ class CommandBuffer { // Adds an execution barrier to a given execution scope: all commands added // before a barrier in a the execution scope will complete before any of the // commands added after a barrier in the same execution scope. - virtual absl::Status Barrier(StreamExecutorInterface* executor, - ExecutionScopeId execution_scope_id) = 0; + virtual absl::Status Barrier(ExecutionScopeId execution_scope_id) = 0; // Adds an execution barrier that synchronizes commands across multiple // execution scopes. See example #2 in execution scope id documentation. virtual absl::Status Barrier( - StreamExecutorInterface* executor, absl::Span execution_scope_ids) = 0; // Adds an execution barrier from execution scope `from_execution_scope_id` to // execution scope `to_execution_scope_id`. See example #3 for details. - virtual absl::Status Barrier(StreamExecutorInterface* executor, - ExecutionScopeId from_execution_scope_id, + virtual absl::Status Barrier(ExecutionScopeId from_execution_scope_id, ExecutionScopeId to_execution_scope_id) = 0; // Adds an execution barrier to the default execution scope. - absl::Status Barrier(StreamExecutorInterface* executor) { - return Barrier(executor, kDefaulExecutionScope); - } + absl::Status Barrier() { return Barrier(kDefaulExecutionScope); } // Adds a kernel launch command. virtual absl::Status Launch(ExecutionScopeId execution_scope_id, @@ -292,29 +287,24 @@ class CommandBuffer { // Adds a conditional operation that will execute a command buffer constructed // by `then_builder` if `pred` value is `true`. virtual absl::Status If(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, DeviceMemory pred, Builder then_builder) = 0; // Adds a conditional If operation to default execution scope. - absl::Status If(StreamExecutorInterface* executor, DeviceMemory pred, - Builder then_builder) { - return If(kDefaulExecutionScope, executor, pred, then_builder); + absl::Status If(DeviceMemory pred, Builder then_builder) { + return If(kDefaulExecutionScope, pred, then_builder); } // Adds a conditional operation that will execute a command buffer constructed // by `then_builder` if `pred` value is `true`, or a command buffer // constructed by `else_builder` if `pred` is `false`. virtual absl::Status IfElse(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, DeviceMemory pred, Builder then_builder, Builder else_builder) = 0; // Adds a conditional IfElse operation to default execution scope. - absl::Status IfElse(StreamExecutorInterface* executor, - DeviceMemory pred, Builder then_builder, + absl::Status IfElse(DeviceMemory pred, Builder then_builder, Builder else_builder) { - return IfElse(kDefaulExecutionScope, executor, pred, then_builder, - else_builder); + return IfElse(kDefaulExecutionScope, pred, then_builder, else_builder); } // Adds a conditional operation that will execute a command buffer constructed @@ -323,15 +313,13 @@ class CommandBuffer { // // See: https://github.com/openxla/stablehlo/blob/main/docs/spec.md#case virtual absl::Status Case(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, DeviceMemory index, std::vector branches) = 0; // Adds a conditional Case operation to default execution scope. - absl::Status Case(StreamExecutorInterface* executor, - DeviceMemory index, + absl::Status Case(DeviceMemory index, std::vector branches) { - return Case(kDefaulExecutionScope, executor, index, branches); + return Case(kDefaulExecutionScope, index, branches); } // Adds a conditional operation that will execute a command buffer constructed @@ -339,15 +327,14 @@ class CommandBuffer { // condition is known at compile time (`num_iteration` < `loop_counter`), and // does not require a `cond_builder`. virtual absl::Status For(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, int32_t num_iteration, DeviceMemory loop_counter, Builder body_builder) = 0; // Adds a conditional For operation to default execution scope. - absl::Status For(StreamExecutorInterface* executor, int32_t num_iteration, - DeviceMemory loop_counter, Builder body_builder) { - return For(kDefaulExecutionScope, executor, num_iteration, loop_counter, + absl::Status For(int32_t num_iteration, DeviceMemory loop_counter, + Builder body_builder) { + return For(kDefaulExecutionScope, num_iteration, loop_counter, body_builder); } @@ -368,16 +355,14 @@ class CommandBuffer { // condition twice: (1) before the conditional node in the scope defined by // `execution_scope_id` (2) inside the loop body with default execution scope. virtual absl::Status While(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, DeviceMemory pred, ExecutionScopeBuilder cond_builder, Builder body_builder) = 0; // Adds a conditional While operation to default execution scope. - absl::Status While(StreamExecutorInterface* executor, DeviceMemory pred, + absl::Status While(DeviceMemory pred, ExecutionScopeBuilder cond_builder, Builder body_builder) { - return While(kDefaulExecutionScope, executor, pred, cond_builder, - body_builder); + return While(kDefaulExecutionScope, pred, cond_builder, body_builder); } //--------------------------------------------------------------------------// diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc index 7d7ff5a4c6bdfb..c0788cf21a9e58 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc +++ b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc @@ -213,73 +213,71 @@ GpuCommandBuffer::Dependencies GpuCommandBuffer::GetBarrier( } absl::StatusOr -GpuCommandBuffer::GetSetIfConditionKernel(StreamExecutorInterface* executor) { +GpuCommandBuffer::GetSetIfConditionKernel() { if (!set_if_condition_kernel_) { MultiKernelLoaderSpec spec(/*arity=*/2); spec.AddCudaPtxInMemory(gpu::GetSetIfConditionKernel(), "set_if_condition"); TF_ASSIGN_OR_RETURN(set_if_condition_kernel_, - SetIfConditionKernel::Create(executor, spec)); + SetIfConditionKernel::Create(parent_, spec)); } return &set_if_condition_kernel_; } absl::StatusOr -GpuCommandBuffer::GetSetIfElseConditionKernel( - StreamExecutorInterface* executor) { +GpuCommandBuffer::GetSetIfElseConditionKernel() { if (!set_if_else_condition_kernel_) { MultiKernelLoaderSpec spec(/*arity=*/3); spec.AddCudaPtxInMemory(gpu::GetSetIfElseConditionKernel(), "set_if_else_condition"); TF_ASSIGN_OR_RETURN(set_if_else_condition_kernel_, - SetIfElseConditionKernel::Create(executor, spec)); + SetIfElseConditionKernel::Create(parent_, spec)); } return &set_if_else_condition_kernel_; } absl::StatusOr -GpuCommandBuffer::GetSetCaseConditionKernel(StreamExecutorInterface* executor) { +GpuCommandBuffer::GetSetCaseConditionKernel() { if (!set_case_condition_kernel_) { MultiKernelLoaderSpec spec(/*arity=*/10); spec.AddCudaPtxInMemory(gpu::GetSetCaseConditionKernel(), "set_case_condition"); TF_ASSIGN_OR_RETURN(set_case_condition_kernel_, - SetCaseConditionKernel::Create(executor, spec)); + SetCaseConditionKernel::Create(parent_, spec)); } return &set_case_condition_kernel_; } absl::StatusOr -GpuCommandBuffer::GetSetForConditionKernel(StreamExecutorInterface* executor) { +GpuCommandBuffer::GetSetForConditionKernel() { if (!set_for_condition_kernel_) { MultiKernelLoaderSpec spec(/*arity=*/3); spec.AddCudaPtxInMemory(gpu::GetSetForConditionKernel(), "set_for_condition"); TF_ASSIGN_OR_RETURN(set_for_condition_kernel_, - SetForConditionKernel::Create(executor, spec)); + SetForConditionKernel::Create(parent_, spec)); } return &set_for_condition_kernel_; } absl::StatusOr -GpuCommandBuffer::GetSetWhileConditionKernel( - StreamExecutorInterface* executor) { +GpuCommandBuffer::GetSetWhileConditionKernel() { if (!set_while_condition_kernel_) { MultiKernelLoaderSpec spec(/*arity=*/2); spec.AddCudaPtxInMemory(gpu::GetSetWhileConditionKernel(), "set_while_condition"); TF_ASSIGN_OR_RETURN(set_while_condition_kernel_, - SetWhileConditionKernel::Create(executor, spec)); + SetWhileConditionKernel::Create(parent_, spec)); } return &set_while_condition_kernel_; } -absl::StatusOr GpuCommandBuffer::GetNoOpKernel( - StreamExecutorInterface* executor) { +absl::StatusOr +GpuCommandBuffer::GetNoOpKernel() { #if !defined(TENSORFLOW_USE_ROCM) if (!noop_kernel_) { MultiKernelLoaderSpec spec(/*arity=*/0); spec.AddCudaPtxInMemory(gpu::kNoOpKernel, "noop"); - TF_ASSIGN_OR_RETURN(noop_kernel_, NoOpKernel::Create(executor, spec)); + TF_ASSIGN_OR_RETURN(noop_kernel_, NoOpKernel::Create(parent_, spec)); } return &noop_kernel_; #else @@ -327,13 +325,13 @@ absl::Status GpuCommandBuffer::CheckNumCommandBuffers( } absl::StatusOr GpuCommandBuffer::CreateBarrierNode( - StreamExecutorInterface* executor, const Dependencies& dependencies) { + const Dependencies& dependencies) { GpuGraphNodeHandle barrier_handle = nullptr; #if !defined(TENSORFLOW_USE_ROCM) // TODO(b/316343054): Instead of empty nodes we create no-op kernel nodes as // barriers because CUDA 12.3 does not support empty nodes inside // conditional command buffers. This should be fixed in CUDA 12.4. - TF_ASSIGN_OR_RETURN(NoOpKernel * noop, GetNoOpKernel(executor)); + TF_ASSIGN_OR_RETURN(NoOpKernel * noop, GetNoOpKernel()); TF_RETURN_IF_ERROR(GpuDriver::GraphAddKernelNode( &barrier_handle, graph_, dependencies, "noop", @@ -362,8 +360,7 @@ GpuCommandBuffer::Dependencies GpuCommandBuffer::GetBarrierDependencies( return dependencies; } -absl::Status GpuCommandBuffer::Barrier(StreamExecutorInterface* executor, - ExecutionScopeId execution_scope_id) { +absl::Status GpuCommandBuffer::Barrier(ExecutionScopeId execution_scope_id) { ExecutionScope& execution_scope = execution_scopes_[execution_scope_id]; if (state_ == State::kCreate) { @@ -391,8 +388,7 @@ absl::Status GpuCommandBuffer::Barrier(StreamExecutorInterface* executor, // If we have multiple dependencies or no existing barriers we have to // create a new empty node acting as an execution barrier. - TF_ASSIGN_OR_RETURN(auto barrier_handle, - CreateBarrierNode(executor, dependencies)); + TF_ASSIGN_OR_RETURN(auto barrier_handle, CreateBarrierNode(dependencies)); execution_scope.barriers.push_back({barrier_handle, true, nodes_offset}); return absl::OkStatus(); } @@ -414,19 +410,18 @@ absl::Status GpuCommandBuffer::Barrier(StreamExecutorInterface* executor, } absl::Status GpuCommandBuffer::Barrier( - StreamExecutorInterface* executor, absl::Span execution_scope_ids) { // Nothing to synchronize here. if (execution_scope_ids.empty()) return absl::OkStatus(); // Do not create two-level barriers for single execution scope. if (execution_scope_ids.size() == 1) { - return Barrier(executor, execution_scope_ids[0]); + return Barrier(execution_scope_ids[0]); } // Add a new barrier to every synchronized execution scope. for (ExecutionScopeId execution_scope_id : execution_scope_ids) { - TF_RETURN_IF_ERROR(Barrier(executor, execution_scope_id)); + TF_RETURN_IF_ERROR(Barrier(execution_scope_id)); } if (state_ == State::kCreate) { @@ -438,8 +433,7 @@ absl::Status GpuCommandBuffer::Barrier( } // Create a new barrier that joins all per-scope barriers together. - TF_ASSIGN_OR_RETURN(auto barrier_handle, - CreateBarrierNode(executor, dependencies)); + TF_ASSIGN_OR_RETURN(auto barrier_handle, CreateBarrierNode(dependencies)); // Broadcast new barrier to all participating execution scopes. for (ExecutionScopeId execution_scope_id : execution_scope_ids) { @@ -470,17 +464,16 @@ absl::Status GpuCommandBuffer::Barrier( return UnsupportedStateError(state_); } -absl::Status GpuCommandBuffer::Barrier(StreamExecutorInterface* executor, - ExecutionScopeId from_execution_scope_id, +absl::Status GpuCommandBuffer::Barrier(ExecutionScopeId from_execution_scope_id, ExecutionScopeId to_execution_scope_id) { // If scopes are the same simply add a barrier to it. if (from_execution_scope_id == to_execution_scope_id) { - return Barrier(executor, from_execution_scope_id); + return Barrier(from_execution_scope_id); } // Create new barriers in both execution scopes. - TF_RETURN_IF_ERROR(Barrier(executor, from_execution_scope_id)); - TF_RETURN_IF_ERROR(Barrier(executor, to_execution_scope_id)); + TF_RETURN_IF_ERROR(Barrier(from_execution_scope_id)); + TF_RETURN_IF_ERROR(Barrier(to_execution_scope_id)); if (state_ == State::kCreate) { // Collect barriers from each scope as dependencies. @@ -489,8 +482,7 @@ absl::Status GpuCommandBuffer::Barrier(StreamExecutorInterface* executor, execution_scopes_[to_execution_scope_id].barriers.back().handle}; // Create a new barrier that joins `from` and `to` scopes. - TF_ASSIGN_OR_RETURN(auto barrier_handle, - CreateBarrierNode(executor, dependencies)); + TF_ASSIGN_OR_RETURN(auto barrier_handle, CreateBarrierNode(dependencies)); // Add a new barrier only to the `to_execution_scope_id`. ExecutionScope& execution_scope = execution_scopes_[to_execution_scope_id]; @@ -759,9 +751,8 @@ GpuCommandBuffer::CreateConditionalNodes( } absl::Status GpuCommandBuffer::CreateConditionalCommand( - ExecutionScopeId execution_scope_id, StreamExecutorInterface* executor, - ConditionType type, SetConditionFn set_condition, - absl::Span builders) { + ExecutionScopeId execution_scope_id, ConditionType type, + SetConditionFn set_condition, absl::Span builders) { ExecutionScope& execution_scope = execution_scopes_[execution_scope_id]; TF_RETURN_IF_ERROR(CheckNotFinalized()); @@ -776,7 +767,7 @@ absl::Status GpuCommandBuffer::CreateConditionalCommand( TF_RETURN_IF_ERROR(set_condition(execution_scope_id, handles)); // Add a barrier between conditional handles and conditional nodes. - TF_RETURN_IF_ERROR(Barrier(executor, execution_scope_id)); + TF_RETURN_IF_ERROR(Barrier(execution_scope_id)); // Create conditional command buffer for each builder. TF_ASSIGN_OR_RETURN( @@ -804,7 +795,7 @@ absl::Status GpuCommandBuffer::CreateConditionalCommand( set_condition(execution_scope_id, cond_cmd_buffers.handles)); // Update a barrier between conditional handles and conditional nodes. - TF_RETURN_IF_ERROR(Barrier(executor, execution_scope_id)); + TF_RETURN_IF_ERROR(Barrier(execution_scope_id)); // Skip updating conditional nodes. execution_scope.update_state.node_idx += num_handles; @@ -818,13 +809,10 @@ absl::Status GpuCommandBuffer::CreateConditionalCommand( } absl::Status GpuCommandBuffer::If(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, DeviceMemory predicate, Builder then_builder) { - DCHECK(executor == parent_); - TF_ASSIGN_OR_RETURN(SetIfConditionKernel * set_if_condition, - GetSetIfConditionKernel(executor)); + GetSetIfConditionKernel()); auto set_cond_fn = [&](ExecutionScopeId id, ConditionalHandles handles) { return CommandBuffer::Launch(*set_if_condition, id, ThreadDim(), BlockDim(), @@ -834,19 +822,16 @@ absl::Status GpuCommandBuffer::If(ExecutionScopeId execution_scope_id, std::array builders = { ToConditionBuilder(std::move(then_builder))}; - return CreateConditionalCommand(execution_scope_id, executor, - ConditionType::kIf, set_cond_fn, builders); + return CreateConditionalCommand(execution_scope_id, ConditionType::kIf, + set_cond_fn, builders); } absl::Status GpuCommandBuffer::IfElse(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, DeviceMemory predicate, Builder then_builder, Builder else_builder) { - DCHECK(executor == parent_); - TF_ASSIGN_OR_RETURN(SetIfElseConditionKernel * set_if_else_condition, - GetSetIfElseConditionKernel(executor)); + GetSetIfElseConditionKernel()); auto set_cond_fn = [&](ExecutionScopeId id, ConditionalHandles handles) { return CommandBuffer::Launch(*set_if_else_condition, id, ThreadDim(), @@ -857,16 +842,13 @@ absl::Status GpuCommandBuffer::IfElse(ExecutionScopeId execution_scope_id, ToConditionBuilder(std::move(then_builder)), ToConditionBuilder(std::move(else_builder))}; - return CreateConditionalCommand(execution_scope_id, executor, - ConditionType::kIf, set_cond_fn, builders); + return CreateConditionalCommand(execution_scope_id, ConditionType::kIf, + set_cond_fn, builders); } absl::Status GpuCommandBuffer::Case(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, DeviceMemory index, std::vector branches) { - DCHECK(executor == parent_); - // TODO(ezhulenev): Relax this constraint, we can launch multiple back to back // kernels to update conditional handles in batches of size 8. if (branches.size() > 8) { @@ -875,7 +857,7 @@ absl::Status GpuCommandBuffer::Case(ExecutionScopeId execution_scope_id, } TF_ASSIGN_OR_RETURN(SetCaseConditionKernel * set_case_condition, - GetSetCaseConditionKernel(executor)); + GetSetCaseConditionKernel()); auto set_cond_fn = [&](ExecutionScopeId id, ConditionalHandles handles) { int32_t num_handles = handles.size(); @@ -899,23 +881,20 @@ absl::Status GpuCommandBuffer::Case(ExecutionScopeId execution_scope_id, builders.push_back(ToConditionBuilder(std::move(branch))); } - return CreateConditionalCommand(execution_scope_id, executor, - ConditionType::kIf, set_cond_fn, builders); + return CreateConditionalCommand(execution_scope_id, ConditionType::kIf, + set_cond_fn, builders); } absl::Status GpuCommandBuffer::For(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, int32_t num_iteration, DeviceMemory loop_counter, Builder body_builder) { - DCHECK(executor == parent_); - TF_ASSIGN_OR_RETURN(SetForConditionKernel * set_for_condition, - GetSetForConditionKernel(executor)); + GetSetForConditionKernel()); // Reset loop counter to zero. TF_RETURN_IF_ERROR(Memset(execution_scope_id, &loop_counter, uint32_t{0}, 1)); - TF_RETURN_IF_ERROR(Barrier(executor, execution_scope_id)); + TF_RETURN_IF_ERROR(Barrier(execution_scope_id)); auto set_cond_fn = [&](ExecutionScopeId id, ConditionalHandles handles) { return CommandBuffer::Launch(*set_for_condition, id, ThreadDim(), @@ -925,7 +904,7 @@ absl::Status GpuCommandBuffer::For(ExecutionScopeId execution_scope_id, auto body = [&](CommandBuffer* body, GpuGraphConditionalHandle handle) { TF_RETURN_IF_ERROR(body_builder(body)); - TF_RETURN_IF_ERROR(body->Barrier(executor)); + TF_RETURN_IF_ERROR(body->Barrier()); // Decide if we want to continue loop iteration. return body->Launch(*set_for_condition, ThreadDim(), BlockDim(), handle, @@ -934,23 +913,20 @@ absl::Status GpuCommandBuffer::For(ExecutionScopeId execution_scope_id, std::array builders = {std::move(body)}; - return CreateConditionalCommand(execution_scope_id, executor, - ConditionType::kWhile, set_cond_fn, builders); + return CreateConditionalCommand(execution_scope_id, ConditionType::kWhile, + set_cond_fn, builders); } absl::Status GpuCommandBuffer::While(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, DeviceMemory pred, ExecutionScopeBuilder cond_builder, Builder body_builder) { - DCHECK(executor == parent_); - TF_ASSIGN_OR_RETURN(SetWhileConditionKernel * set_while_condition, - GetSetWhileConditionKernel(executor)); + GetSetWhileConditionKernel()); // Record condition commands into the parent command buffer. TF_RETURN_IF_ERROR(cond_builder(execution_scope_id, this)); - TF_RETURN_IF_ERROR(Barrier(executor, execution_scope_id)); + TF_RETURN_IF_ERROR(Barrier(execution_scope_id)); auto set_cond_fn = [&](ExecutionScopeId id, ConditionalHandles handles) { return CommandBuffer::Launch(*set_while_condition, id, ThreadDim(), @@ -959,17 +935,17 @@ absl::Status GpuCommandBuffer::While(ExecutionScopeId execution_scope_id, auto body = [&](CommandBuffer* body, GpuGraphConditionalHandle handle) { TF_RETURN_IF_ERROR(body_builder(body)); - TF_RETURN_IF_ERROR(body->Barrier(executor)); + TF_RETURN_IF_ERROR(body->Barrier()); TF_RETURN_IF_ERROR(cond_builder(kDefaulExecutionScope, body)); - TF_RETURN_IF_ERROR(body->Barrier(executor)); + TF_RETURN_IF_ERROR(body->Barrier()); return body->Launch(*set_while_condition, ThreadDim(), BlockDim(), handle, pred); }; std::array builders = {std::move(body)}; - return CreateConditionalCommand(execution_scope_id, executor, - ConditionType::kWhile, set_cond_fn, builders); + return CreateConditionalCommand(execution_scope_id, ConditionType::kWhile, + set_cond_fn, builders); } absl::Status GpuCommandBuffer::Finalize() { diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.h b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.h index 2ee78b56a6e2f1..aaa0666a7f89bd 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.h +++ b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.h @@ -77,15 +77,12 @@ class GpuCommandBuffer : public CommandBuffer { bool is_owned_graph = true); ~GpuCommandBuffer() override; - absl::Status Barrier(StreamExecutorInterface* executor, - ExecutionScopeId execution_scope_id) override; + absl::Status Barrier(ExecutionScopeId execution_scope_id) override; absl::Status Barrier( - StreamExecutorInterface* executor, absl::Span execution_scope_ids) override; - absl::Status Barrier(StreamExecutorInterface* executor, - ExecutionScopeId from_execution_scope_id, + absl::Status Barrier(ExecutionScopeId from_execution_scope_id, ExecutionScopeId to_execution_scope_id) override; absl::Status Launch(ExecutionScopeId execution_scope_id, @@ -105,26 +102,22 @@ class GpuCommandBuffer : public CommandBuffer { size_t num_elements) override; absl::Status If(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, DeviceMemory predicate, Builder then_builder) override; absl::Status IfElse(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, DeviceMemory predicate, Builder then_builder, Builder else_builder) override; absl::Status Case(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, DeviceMemory index, std::vector branches) override; - absl::Status For(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, int32_t num_iteration, + absl::Status For(ExecutionScopeId execution_scope_id, int32_t num_iteration, DeviceMemory loop_counter, Builder body_builder) override; absl::Status While(ExecutionScopeId execution_scope_id, - StreamExecutorInterface* executor, DeviceMemory pred, + DeviceMemory pred, ExecutionScopeBuilder cond_builder, Builder body_builder) override; @@ -250,25 +243,20 @@ class GpuCommandBuffer : public CommandBuffer { absl::Span handles); absl::Status CreateConditionalCommand( - ExecutionScopeId execution_scope_id, StreamExecutorInterface* executor, - ConditionType type, SetConditionFn set_condition, + ExecutionScopeId execution_scope_id, ConditionType type, + SetConditionFn set_condition, absl::Span builders); Dependencies GetBarrier(ExecutionScopeId execution_scope_id); // Returns loaded auxiliary kernels, or loads them on a given stream executor. // Loaded kernels owned by a current command buffer. - absl::StatusOr GetSetIfConditionKernel( - StreamExecutorInterface* executor); - absl::StatusOr GetSetIfElseConditionKernel( - StreamExecutorInterface* executor); - absl::StatusOr GetSetCaseConditionKernel( - StreamExecutorInterface* executor); - absl::StatusOr GetSetForConditionKernel( - StreamExecutorInterface* executor); - absl::StatusOr GetSetWhileConditionKernel( - StreamExecutorInterface* executor); - absl::StatusOr GetNoOpKernel(StreamExecutorInterface* executor); + absl::StatusOr GetSetIfConditionKernel(); + absl::StatusOr GetSetIfElseConditionKernel(); + absl::StatusOr GetSetCaseConditionKernel(); + absl::StatusOr GetSetForConditionKernel(); + absl::StatusOr GetSetWhileConditionKernel(); + absl::StatusOr GetNoOpKernel(); // Recursively disable all nodes corresponding to barriers (including nested // conditional command buffers). This is work around the fact that we can't @@ -293,7 +281,7 @@ class GpuCommandBuffer : public CommandBuffer { // Creates a new no-op node acting as a barrier. absl::StatusOr CreateBarrierNode( - StreamExecutorInterface* executor, const Dependencies& dependencies); + const Dependencies& dependencies); // Collects a set of dependencies for a new barrier. Dependencies GetBarrierDependencies(ExecutionScopeId execution_scope_id); diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc index 7ff757aab9aa5d..4440393cc3bb8d 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc +++ b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc @@ -382,22 +382,22 @@ TEST(GpuCommandBufferTest, Barriers) { auto record = [&](CommandBuffer* cmd_buffer, uint32_t bit_pattern) { // Check that root barrier ignored. - TF_RETURN_IF_ERROR(cmd_buffer->Barrier(executor)); + TF_RETURN_IF_ERROR(cmd_buffer->Barrier()); TF_RETURN_IF_ERROR(cmd_buffer->Memset(&buffers[0], bit_pattern + 0, 1)); // Check barrier after a single command. - TF_RETURN_IF_ERROR(cmd_buffer->Barrier(executor)); + TF_RETURN_IF_ERROR(cmd_buffer->Barrier()); TF_RETURN_IF_ERROR(cmd_buffer->Memset(&buffers[1], bit_pattern + 1, 1)); // Check that repeated barriers are no-op. - TF_RETURN_IF_ERROR(cmd_buffer->Barrier(executor)); - TF_RETURN_IF_ERROR(cmd_buffer->Barrier(executor)); + TF_RETURN_IF_ERROR(cmd_buffer->Barrier()); + TF_RETURN_IF_ERROR(cmd_buffer->Barrier()); TF_RETURN_IF_ERROR(cmd_buffer->Memset(&buffers[2], bit_pattern + 2, 1)); TF_RETURN_IF_ERROR(cmd_buffer->Memset(&buffers[3], bit_pattern + 3, 1)); // Check that barrier can have multiple dependencies. - TF_RETURN_IF_ERROR(cmd_buffer->Barrier(executor)); + TF_RETURN_IF_ERROR(cmd_buffer->Barrier()); TF_RETURN_IF_ERROR(cmd_buffer->Memset(&buffers[4], bit_pattern + 4, 1)); TF_RETURN_IF_ERROR(cmd_buffer->Memset(&buffers[5], bit_pattern + 5, 1)); // Check that barrier can be that last command. - TF_RETURN_IF_ERROR(cmd_buffer->Barrier(executor)); + TF_RETURN_IF_ERROR(cmd_buffer->Barrier()); return cmd_buffer->Finalize(); }; @@ -476,8 +476,8 @@ TEST(GpuCommandBufferTest, IndependentExecutionScopes) { TF_RETURN_IF_ERROR(cmd_buffer->Memset(s0, &buffers[1], bit_pattern + 1, 1)); TF_RETURN_IF_ERROR(cmd_buffer->Memset(s1, &buffers[2], bit_pattern + 2, 1)); TF_RETURN_IF_ERROR(cmd_buffer->Memset(s1, &buffers[3], bit_pattern + 3, 1)); - TF_RETURN_IF_ERROR(cmd_buffer->Barrier(executor, s0)); - TF_RETURN_IF_ERROR(cmd_buffer->Barrier(executor, s1)); + TF_RETURN_IF_ERROR(cmd_buffer->Barrier(s0)); + TF_RETURN_IF_ERROR(cmd_buffer->Barrier(s1)); return cmd_buffer->Finalize(); }; @@ -548,7 +548,7 @@ TEST(GpuCommandBufferTest, ExecutionScopeBarriers) { TF_RETURN_IF_ERROR(cmd_buffer->Memset(s1, &buffers[2], bit_pattern + 2, 1)); TF_RETURN_IF_ERROR(cmd_buffer->Memset(s1, &buffers[3], bit_pattern + 3, 1)); // This will synchronize scopes 0 and 1 and also create an empty scope 2. - TF_RETURN_IF_ERROR(cmd_buffer->Barrier(executor, {s0, s1, s2})); + TF_RETURN_IF_ERROR(cmd_buffer->Barrier({s0, s1, s2})); TF_RETURN_IF_ERROR(cmd_buffer->Memset(s0, &buffers[4], bit_pattern + 4, 1)); TF_RETURN_IF_ERROR(cmd_buffer->Memset(s1, &buffers[5], bit_pattern + 5, 1)); TF_RETURN_IF_ERROR(cmd_buffer->Memset(s2, &buffers[6], bit_pattern + 6, 1)); @@ -639,7 +639,7 @@ TEST(GpuCommandBufferTest, ExecutionScopeOneDirectionalBarriers) { TF_RETURN_IF_ERROR(cmd_buffer->Memset(s1, &buffers[2], bit_pattern + 2, 1)); TF_RETURN_IF_ERROR(cmd_buffer->Memset(s1, &buffers[3], bit_pattern + 3, 1)); // This will synchronize scopes 0 and 1. - TF_RETURN_IF_ERROR(cmd_buffer->Barrier(executor, s0, s1)); + TF_RETURN_IF_ERROR(cmd_buffer->Barrier(s0, s1)); TF_RETURN_IF_ERROR(cmd_buffer->Memset(s0, &buffers[4], bit_pattern + 4, 1)); TF_RETURN_IF_ERROR(cmd_buffer->Memset(s1, &buffers[5], bit_pattern + 5, 1)); return cmd_buffer->Finalize(); @@ -721,7 +721,7 @@ TEST(GpuCommandBufferTest, ConditionalIf) { // Create a command buffer with a single conditional operation. auto cmd_buffer = CommandBuffer::Create(executor).value(); - TF_ASSERT_OK(cmd_buffer->If(executor, pred, then_builder)); + TF_ASSERT_OK(cmd_buffer->If(pred, then_builder)); TF_ASSERT_OK(cmd_buffer->Finalize()); TF_ASSERT_OK(executor->Submit(stream.get(), *cmd_buffer)); @@ -760,7 +760,7 @@ TEST(GpuCommandBufferTest, ConditionalIf) { // Update command buffer with a conditional to use new builder. TF_ASSERT_OK(cmd_buffer->Update()); - TF_ASSERT_OK(cmd_buffer->If(executor, pred, then_builder)); + TF_ASSERT_OK(cmd_buffer->If(pred, then_builder)); TF_ASSERT_OK(cmd_buffer->Finalize()); TF_ASSERT_OK(executor->Submit(stream.get(), *cmd_buffer)); @@ -818,7 +818,7 @@ TEST(GpuCommandBufferTest, ConditionalIfElse) { // Create a command buffer with a single conditional operation. auto cmd_buffer = CommandBuffer::Create(executor).value(); - TF_ASSERT_OK(cmd_buffer->IfElse(executor, pred, then_builder, else_builder)); + TF_ASSERT_OK(cmd_buffer->IfElse(pred, then_builder, else_builder)); TF_ASSERT_OK(cmd_buffer->Finalize()); TF_ASSERT_OK(executor->Submit(stream.get(), *cmd_buffer)); @@ -855,7 +855,7 @@ TEST(GpuCommandBufferTest, ConditionalIfElse) { // Update command buffer with a conditional to use new `else` builder. TF_ASSERT_OK(cmd_buffer->Update()); - TF_ASSERT_OK(cmd_buffer->IfElse(executor, pred, then_builder, else_builder)); + TF_ASSERT_OK(cmd_buffer->IfElse(pred, then_builder, else_builder)); TF_ASSERT_OK(cmd_buffer->Finalize()); TF_ASSERT_OK(executor->Submit(stream.get(), *cmd_buffer)); @@ -913,7 +913,7 @@ TEST(GpuCommandBufferTest, ConditionalCase) { // Create a command buffer with a single conditional operation. auto cmd_buffer = CommandBuffer::Create(executor).value(); - TF_ASSERT_OK(cmd_buffer->Case(executor, index, {branch0, branch1})); + TF_ASSERT_OK(cmd_buffer->Case(index, {branch0, branch1})); TF_ASSERT_OK(cmd_buffer->Finalize()); TF_ASSERT_OK(executor->Submit(stream.get(), *cmd_buffer)); @@ -992,8 +992,7 @@ TEST(GpuCommandBufferTest, ConditionalFor) { // Create a command buffer with a single conditional operation. auto cmd_buffer = CommandBuffer::Create(executor).value(); - TF_ASSERT_OK( - cmd_buffer->For(executor, num_iters, loop_counter, body_builder)); + TF_ASSERT_OK(cmd_buffer->For(num_iters, loop_counter, body_builder)); TF_ASSERT_OK(cmd_buffer->Finalize()); TF_ASSERT_OK(executor->Submit(stream.get(), *cmd_buffer)); @@ -1060,7 +1059,7 @@ TEST(GpuCommandBufferTest, ConditionalWhile) { // Create a command buffer with a single conditional operation. auto cmd_buffer = CommandBuffer::Create(executor).value(); - TF_ASSERT_OK(cmd_buffer->While(executor, pred, cond_builder, body_builder)); + TF_ASSERT_OK(cmd_buffer->While(pred, cond_builder, body_builder)); TF_ASSERT_OK(cmd_buffer->Finalize()); TF_ASSERT_OK(executor->Submit(stream.get(), *cmd_buffer)); @@ -1112,16 +1111,15 @@ TEST(GpuCommandBufferTest, ConditionalIfInExecutionScope) { TF_RETURN_IF_ERROR(cmd_buffer->Memset(s0, &buffers[1], bit_pattern + 1, 1)); // Record If in execution scope #1 - TF_RETURN_IF_ERROR( - cmd_buffer->If(s1, executor, pred, [&](CommandBuffer* then_cmd) { - return then_cmd->Memset(&buffers[2], bit_pattern + 2, 1); - })); + TF_RETURN_IF_ERROR(cmd_buffer->If(s1, pred, [&](CommandBuffer* then_cmd) { + return then_cmd->Memset(&buffers[2], bit_pattern + 2, 1); + })); // Create a barrier in execution scope #0. - TF_RETURN_IF_ERROR(cmd_buffer->Barrier(executor, s0)); + TF_RETURN_IF_ERROR(cmd_buffer->Barrier(s0)); // Create a barrier between two execution scopes. - TF_RETURN_IF_ERROR(cmd_buffer->Barrier(executor, {s0, s1})); + TF_RETURN_IF_ERROR(cmd_buffer->Barrier({s0, s1})); return cmd_buffer->Finalize(); }; @@ -1210,7 +1208,7 @@ TEST(GpuCommandBufferTest, ConditionalWhileInExecutionScope) { // Record While in execution scope #1 TF_RETURN_IF_ERROR(cmd_buffer->While( - s1, executor, pred, + s1, pred, // Loop cond: loop_counter++ < num_iters; [&](ExecutionScopeId id, CommandBuffer* cond_cmd) { return cond_cmd->Launch(inc_and_cmp, id, ThreadDim(), BlockDim(), @@ -1222,7 +1220,7 @@ TEST(GpuCommandBufferTest, ConditionalWhileInExecutionScope) { })); // Create a barrier between two execution scopes. - TF_RETURN_IF_ERROR(cmd_buffer->Barrier(executor, {s0, s1})); + TF_RETURN_IF_ERROR(cmd_buffer->Barrier({s0, s1})); return cmd_buffer->Finalize(); }; From ad6690b18aa7f9ced5b4645e696d87df5e34b211 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 13 May 2024 09:23:16 -0700 Subject: [PATCH 029/478] [xla:ffi] Unify external and internal user data registration and use i64 type ids - treat external and internal user data uniformly as an opaque pointer + optional deleter - use the same i64 TypeId for internal and external types to avoid expensive string comparison on a hot path PiperOrigin-RevId: 633234788 --- third_party/xla/xla/ffi/BUILD | 5 + third_party/xla/xla/ffi/api/api.h | 6 +- third_party/xla/xla/ffi/api/c_api.h | 45 ++++-- third_party/xla/xla/ffi/api/ffi.h | 41 ++++- third_party/xla/xla/ffi/api/ffi_test.cc | 12 +- third_party/xla/xla/ffi/call_frame.cc | 6 +- third_party/xla/xla/ffi/execution_context.cc | 76 +++++---- third_party/xla/xla/ffi/execution_context.h | 146 ++++++++++-------- .../xla/xla/ffi/execution_context_test.cc | 52 ++++--- third_party/xla/xla/ffi/ffi.h | 2 +- third_party/xla/xla/ffi/ffi_api.cc | 41 +++-- third_party/xla/xla/ffi/ffi_test.cc | 10 +- .../xla/xla/service/gpu/custom_call_test.cc | 4 +- 13 files changed, 276 insertions(+), 170 deletions(-) diff --git a/third_party/xla/xla/ffi/BUILD b/third_party/xla/xla/ffi/BUILD index dbc3785e8d5e56..c53835bf83a88d 100644 --- a/third_party/xla/xla/ffi/BUILD +++ b/third_party/xla/xla/ffi/BUILD @@ -35,10 +35,15 @@ cc_library( srcs = ["execution_context.cc"], hdrs = ["execution_context.h"], deps = [ + "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@local_tsl//tsl/lib/gtl:int_type", + "@local_tsl//tsl/platform:logging", + "@local_tsl//tsl/platform:statusor", ], ) diff --git a/third_party/xla/xla/ffi/api/api.h b/third_party/xla/xla/ffi/api/api.h index 75b678d9490c10..655d0ba1b656d5 100644 --- a/third_party/xla/xla/ffi/api/api.h +++ b/third_party/xla/xla/ffi/api/api.h @@ -150,10 +150,8 @@ XLA_FFI_Error* Ffi::RegisterStaticHandler(const XLA_FFI_Api* api, XLA_FFI_Handler_Register_Args args; args.struct_size = XLA_FFI_Handler_Register_Args_STRUCT_SIZE; args.priv = nullptr; - args.name = XLA_FFI_ByteSpan{XLA_FFI_ByteSpan_STRUCT_SIZE, nullptr, - name.data(), name.size()}; - args.platform = XLA_FFI_ByteSpan{XLA_FFI_ByteSpan_STRUCT_SIZE, nullptr, - platform.data(), platform.size()}; + args.name = XLA_FFI_ByteSpan{name.data(), name.size()}; + args.platform = XLA_FFI_ByteSpan{platform.data(), platform.size()}; args.handler = handler; args.traits = traits; return api->XLA_FFI_Handler_Register(&args); diff --git a/third_party/xla/xla/ffi/api/c_api.h b/third_party/xla/xla/ffi/api/c_api.h index 39d4d8feef825e..243e5d3e2bea51 100644 --- a/third_party/xla/xla/ffi/api/c_api.h +++ b/third_party/xla/xla/ffi/api/c_api.h @@ -217,44 +217,38 @@ typedef enum { typedef struct XLA_FFI_ExecutionContext XLA_FFI_ExecutionContext; //===----------------------------------------------------------------------===// -// Call frame +// Primitives. //===----------------------------------------------------------------------===// +// TypeId uniquely identifies a user-defined type in a given XLA FFI instance. +struct XLA_FFI_TypeId { + int64_t type_id; +}; + // We use byte spans to pass strings to handlers because strings might not be // null terminated, and even if they are, looking for a null terminator can // become very expensive in tight loops. struct XLA_FFI_ByteSpan { - size_t struct_size; - void* priv; - const char* ptr; size_t len; }; -XLA_FFI_DEFINE_STRUCT_TRAITS(XLA_FFI_ByteSpan, len); - // A struct to pass a scalar value to FFI handler. struct XLA_FFI_Scalar { - size_t struct_size; - void* priv; - XLA_FFI_DataType dtype; void* value; }; -XLA_FFI_DEFINE_STRUCT_TRAITS(XLA_FFI_Scalar, value); - // A struct to pass a dense array to FFI handler. struct XLA_FFI_Array { - size_t struct_size; - void* priv; - XLA_FFI_DataType dtype; size_t size; void* data; }; -XLA_FFI_DEFINE_STRUCT_TRAITS(XLA_FFI_Array, data); +//===----------------------------------------------------------------------===// +// Call frame +//===----------------------------------------------------------------------===// struct XLA_FFI_Args { size_t struct_size; @@ -336,6 +330,24 @@ XLA_FFI_DEFINE_STRUCT_TRAITS(XLA_FFI_Handler_Register_Args, traits); typedef XLA_FFI_Error* XLA_FFI_Handler_Register( XLA_FFI_Handler_Register_Args* args); +//===----------------------------------------------------------------------===// +// TypeId +//===----------------------------------------------------------------------===// + +struct XLA_FFI_TypeId_Register_Args { + size_t struct_size; + void* priv; + + XLA_FFI_ByteSpan name; + XLA_FFI_TypeId* type_id; // out +}; + +XLA_FFI_DEFINE_STRUCT_TRAITS(XLA_FFI_TypeId_Register_Args, type_id); + +// Registers user type `name` and returns a unique `type_id`. +typedef XLA_FFI_Error* XLA_FFI_TypeId_Register( + XLA_FFI_TypeId_Register_Args* args); + //===----------------------------------------------------------------------===// // ExecutionContext //===----------------------------------------------------------------------===// @@ -345,7 +357,7 @@ struct XLA_FFI_ExecutionContext_Get_Args { void* priv; XLA_FFI_ExecutionContext* ctx; - XLA_FFI_ByteSpan id; + XLA_FFI_TypeId* type_id; void* data; // out }; @@ -390,6 +402,7 @@ struct XLA_FFI_Api { _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_Error_Destroy); _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_Handler_Register); _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_Stream_Get); + _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_TypeId_Register); _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_ExecutionContext_Get); }; diff --git a/third_party/xla/xla/ffi/api/ffi.h b/third_party/xla/xla/ffi/api/ffi.h index addb7af543c8cf..3d56c00b94bda2 100644 --- a/third_party/xla/xla/ffi/api/ffi.h +++ b/third_party/xla/xla/ffi/api/ffi.h @@ -16,12 +16,12 @@ limitations under the License. #ifndef XLA_FFI_API_FFI_H_ #define XLA_FFI_API_FFI_H_ -#include #ifdef XLA_FFI_FFI_H_ #error Two different XLA FFI implementations cannot be included together #endif // XLA_FFI_FFI_H_ #include +#include #include #include #include @@ -29,6 +29,7 @@ limitations under the License. #include #include #include +#include #include #include #include @@ -567,27 +568,51 @@ struct CtxDecoding> { // UserData //===----------------------------------------------------------------------===// +// All user data types that are passed via the execution context must be +// registered with the XLA FFI ahead of time to get unique type id. +using TypeId = XLA_FFI_TypeId; // NOLINT + +inline XLA_FFI_Error* RegisterType(const XLA_FFI_Api* api, + std::string_view name, + XLA_FFI_TypeId* type_id) { + XLA_FFI_TypeId_Register_Args args; + args.struct_size = XLA_FFI_TypeId_Register_Args_STRUCT_SIZE; + args.priv = nullptr; + args.name = XLA_FFI_ByteSpan{name.data(), name.size()}; + args.type_id = type_id; + return api->XLA_FFI_TypeId_Register(&args); +} + +#define XLA_FFI_REGISTER_TYPE(API, NAME, TYPE_ID) \ + XLA_FFI_REGISTER_TYPE_(API, NAME, TYPE_ID, __COUNTER__) +#define XLA_FFI_REGISTER_TYPE_(API, NAME, TYPE_ID, N) \ + XLA_FFI_ATTRIBUTE_UNUSED static const XLA_FFI_Error* \ + xla_ffi_type_##N##_registered_ = \ + [] { return ::xla::ffi::RegisterType(API, NAME, TYPE_ID); }() + // A type tag for automatic decoding user data passed via the execution context. -template +template struct UserData {}; -template -struct CtxDecoding> { +template +struct CtxDecoding> { using Type = T*; + static_assert(std::is_same_v, + "UserData type must have a static `TypeId id` field"); + static std::optional Decode(const XLA_FFI_Api* api, XLA_FFI_ExecutionContext* ctx, DiagnosticEngine& diagnostic) { - static constexpr std::string_view id_view = {id}; - XLA_FFI_ExecutionContext_Get_Args args; args.struct_size = XLA_FFI_ExecutionContext_Get_Args_STRUCT_SIZE; args.priv = nullptr; args.ctx = ctx; - args.id = XLA_FFI_ByteSpan{XLA_FFI_ByteSpan_STRUCT_SIZE, nullptr, - id_view.data(), id_view.size()}; + args.type_id = &T::id; args.data = nullptr; + assert(args.type_id->type_id > 0 && "type must be registered with XLA FFI"); + if (XLA_FFI_Error* err = api->XLA_FFI_ExecutionContext_Get(&args); err) { diagnostic.Emit("Failed to get user data from execution context: ") << internal::ErrorUtil::GetErrorMessage(api, err); diff --git a/third_party/xla/xla/ffi/api/ffi_test.cc b/third_party/xla/xla/ffi/api/ffi_test.cc index c47e16b22e7de3..b963b99c5689fd 100644 --- a/third_party/xla/xla/ffi/api/ffi_test.cc +++ b/third_party/xla/xla/ffi/api/ffi_test.cc @@ -352,17 +352,19 @@ TEST(FfiTest, PointerAttr) { } struct MyData { + static TypeId id; std::string str; }; -TEST(FfiTest, UserData) { - static constexpr char kId[] = "my_data"; +TypeId MyData::id = {}; // zero-initialize type id +XLA_FFI_REGISTER_TYPE(GetXlaFfiApi(), "my_data", &MyData::id); +TEST(FfiTest, UserData) { MyData data{"foo"}; - auto deleter = +[](void*) {}; ExecutionContext execution_context; - TF_ASSERT_OK(execution_context.Emplace(kId, &data, deleter)); + TF_ASSERT_OK(execution_context.Insert( + ExecutionContext::TypeId(MyData::id.type_id), &data)); CallFrameBuilder builder; auto call_frame = builder.Build(); @@ -372,7 +374,7 @@ TEST(FfiTest, UserData) { return Error::Success(); }; - auto handler = Ffi::Bind().Ctx>().To(fn); + auto handler = Ffi::Bind().Ctx>().To(fn); ServiceExecutableRunOptions service_run_options; service_run_options.mutable_run_options()->set_ffi_execution_context( diff --git a/third_party/xla/xla/ffi/call_frame.cc b/third_party/xla/xla/ffi/call_frame.cc index 867c7ccb35edf2..ee17f61c7e29fb 100644 --- a/third_party/xla/xla/ffi/call_frame.cc +++ b/third_party/xla/xla/ffi/call_frame.cc @@ -135,19 +135,19 @@ struct CallFrame::Dictionary { struct CallFrame::Array { CallFrameBuilder::Array value; // XLA_FFI_Array::data - XLA_FFI_Array array = {XLA_FFI_Array_STRUCT_SIZE, nullptr}; + XLA_FFI_Array array = {}; }; struct CallFrame::Scalar { CallFrameBuilder::Scalar value; // XLA_FFI_Scalar::value - XLA_FFI_Scalar scalar = {XLA_FFI_Scalar_STRUCT_SIZE, nullptr}; + XLA_FFI_Scalar scalar = {}; }; struct CallFrame::String { std::string value; // XLA_FFI_ByteSpan::ptr - XLA_FFI_ByteSpan span = {XLA_FFI_ByteSpan_STRUCT_SIZE, nullptr}; + XLA_FFI_ByteSpan span = {}; }; struct CallFrame::NamedAttribute { diff --git a/third_party/xla/xla/ffi/execution_context.cc b/third_party/xla/xla/ffi/execution_context.cc index 53aaeb7943a41c..2ea5f8bf37f736 100644 --- a/third_party/xla/xla/ffi/execution_context.cc +++ b/third_party/xla/xla/ffi/execution_context.cc @@ -15,73 +15,87 @@ limitations under the License. #include "xla/ffi/execution_context.h" +#include #include #include #include #include #include +#include "absl/base/attributes.h" +#include "absl/base/const_init.h" +#include "absl/container/flat_hash_map.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/str_cat.h" +#include "absl/synchronization/mutex.h" namespace xla::ffi { -ExecutionContext::OpaqueUserData::OpaqueUserData( - void* data, OpaqueUserData::Deleter deleter) +ABSL_CONST_INIT absl::Mutex type_registry_mutex(absl::kConstInit); + +using TypeRegistry = absl::flat_hash_map; +static TypeRegistry& StaticTypeRegistry() { + static auto* registry = new TypeRegistry(); + return *registry; +} + +ExecutionContext::TypeId ExecutionContext::GetNextTypeId() { + static auto* counter = new std::atomic(1); + return TypeId(counter->fetch_add(1)); +} + +ExecutionContext::UserData::UserData(void* data, Deleter deleter) : data_(data), deleter_(std::move(deleter)) {} -ExecutionContext::OpaqueUserData::~OpaqueUserData() { +ExecutionContext::UserData::~UserData() { if (deleter_) deleter_(data_); } -absl::Status ExecutionContext::Emplace(std::string id, void* data, - OpaqueUserData::Deleter deleter) { - if (!data) return absl::InvalidArgumentError("User data must be not null"); +absl::StatusOr +ExecutionContext::RegisterExternalTypeId(std::string_view name) { + absl::MutexLock lock(&type_registry_mutex); + auto& registry = StaticTypeRegistry(); - auto emplaced = opaque_.emplace( - id, std::make_shared(data, std::move(deleter))); + // Try to emplace with type id zero and fill it with real type id only if we + // successfully acquired an entry for a given name. + auto emplaced = registry.emplace(name, TypeId(0)); if (!emplaced.second) { return absl::AlreadyExistsError( - absl::StrCat("Opaque user data with id ", id, - " already exists in execution context")); + absl::StrCat("Type id ", emplaced.first->second.value(), + " already registered for type name ", name)); } - - return absl::OkStatus(); + return emplaced.first->second = GetNextTypeId(); } -absl::StatusOr> -ExecutionContext::Lookup(std::string_view id) const { - auto it = opaque_.find(id); - if (it == opaque_.end()) { - return absl::NotFoundError(absl::StrCat("Opaque user data with id ", id, - " not found in execution context")); - } - return it->second; +absl::Status ExecutionContext::Insert(TypeId type_id, void* data, + Deleter deleter) { + return InsertUserData(type_id, + std::make_unique(data, std::move(deleter))); } -absl::Status ExecutionContext::Insert(int64_t type_id, - std::shared_ptr data) { +absl::Status ExecutionContext::InsertUserData(TypeId type_id, + std::unique_ptr data) { if (!data) return absl::InvalidArgumentError("User data must be not null"); - auto emplaced = typed_.emplace(type_id, std::move(data)); + auto emplaced = user_data_.emplace(type_id, std::move(data)); if (!emplaced.second) { return absl::AlreadyExistsError( - absl::StrCat("User data with type id ", type_id, + absl::StrCat("User data with type id ", type_id.value(), " already exists in execution context")); } - return absl::OkStatus(); } -absl::StatusOr> -ExecutionContext::Lookup(int64_t type_id) const { - auto it = typed_.find(type_id); - if (it == typed_.end()) { - return absl::NotFoundError(absl::StrCat("User data with type id ", type_id, +absl::StatusOr ExecutionContext::LookupUserData( + TypeId type_id) const { + auto it = user_data_.find(type_id); + if (it == user_data_.end()) { + return absl::NotFoundError(absl::StrCat("User data with type id ", + type_id.value(), " not found in execution context")); } - return it->second; + return it->second.get(); } } // namespace xla::ffi diff --git a/third_party/xla/xla/ffi/execution_context.h b/third_party/xla/xla/ffi/execution_context.h index 21d3c85e1facb4..1a5250913830c4 100644 --- a/third_party/xla/xla/ffi/execution_context.h +++ b/third_party/xla/xla/ffi/execution_context.h @@ -16,110 +16,128 @@ limitations under the License. #ifndef XLA_FFI_EXECUTION_CONTEXT_H_ #define XLA_FFI_EXECUTION_CONTEXT_H_ +#include #include #include #include -#include #include -#include #include #include "absl/container/flat_hash_map.h" #include "absl/status/status.h" #include "absl/status/statusor.h" +#include "tsl/lib/gtl/int_type.h" +#include "tsl/platform/logging.h" +#include "tsl/platform/statusor.h" namespace xla::ffi { // Execution context is a container for forwarding arbitrary user data to FFI -// handlers in the scope of a single execution. Execution context allows to pass -// arbitrary user data to FFI handlers via the side channel that does not -// require modifying HLO modules. There are two kinds of user data that can be -// passed to FFI handlers: +// handlers in the scope of a single XLA execution. Execution context allows to +// pass arbitrary user data to FFI handlers via the side channel that does not +// require modifying HLO modules. // -// 1. Opaque data. This is a wrapper for an opaque user data pointer that is -// useful when FFI handler is registered in the dynamically loaded library -// and we do not know the type of the data and can only work with the opaque -// pointer. +// From XLA FFI perspective user data is an opaque pointer that can be +// forwarded to the FFI handler. We rely on type id to guarantee that we forward +// user data of correct type. There are kinds of type ids: // -// 2. Typed data. This is useful when the FFI handler is registered in the same -// process and we can rely on global static variable to assign ids to types -// and we don't need to worry about breaking C++ ABI. +// 1. External type id. When FFI handlers defined in a dynamically loaded +// library, they must register types used in the execution context ahead +// of time and explicitly get a unique type id for them. // -// For internal FFI handlers we always use typed data, and use opaque data only -// if FFI handler has to be defined in a separate dynamically loaded library. +// 2. Internal type id. When FFI handler defined in the same binary we rely +// on a global static registry to automatically assing type ids. // // Examples: FFI handler can register a per-execution cache in the execution // context and get access to it in the FFI handler, with a guarantee that it is // unique between separate calls to XLA execute. class ExecutionContext { public: - // A base class for typed user data used for FFI handlers registered in the - // same process where we can safely pass around C++ objects. - class UserData { - public: - virtual ~UserData() = default; - }; + template + using Deleter = std::function; + + TSL_LIB_GTL_DEFINE_INT_TYPE(TypeId, int64_t); + + // Registers external type with a given name in a static type registry. + static absl::StatusOr RegisterExternalTypeId(std::string_view name); + + // Inserts opaque user data with a given type id and optional deleter. + absl::Status Insert(TypeId type_id, void* data, + Deleter deleter = nullptr); + // Inserts typed user data of type `T` and optional deleter. template - using IsUserData = std::enable_if_t>; + absl::Status Insert(T* data, Deleter deleter = nullptr); - // An RAII wrapper for opaque user data that is useful when FFI handler is - // registered in the dynamically loaded library and we do not know the type of - // the data and can only work with the opaque pointer. - class OpaqueUserData { - public: - using Deleter = std::function; + // Emplaces typed user data constructed from `args`. Execution context + // becomes the owner of the constructed object. + template + absl::Status Emplace(Args&&... args); + + // Looks up typed execution context data of type `T`. + template + absl::StatusOr Lookup() const { + TF_ASSIGN_OR_RETURN(auto user_data, LookupUserData(GetTypeId())); + return static_cast(user_data->data()); + } + + // Looks up opaque execution context data with given `type_id`. + absl::StatusOr Lookup(TypeId type_id) const { + TF_ASSIGN_OR_RETURN(auto user_data, LookupUserData(type_id)); + return user_data->data(); + } - OpaqueUserData(void* data, Deleter deleter); - ~OpaqueUserData(); + private: + // An RAII wrapper for opaque user data. Optional deleter will be called when + // UserData is destroyed together with the execution context. If deleter is + // nullptr then the caller is responsible for making sure that the pointer + // stays valid during the XLA execution and correctly destroyed afterwards. + class UserData { + public: + UserData(void* data, Deleter deleter); + ~UserData(); - OpaqueUserData(OpaqueUserData&) = delete; - OpaqueUserData& operator=(const OpaqueUserData&) = delete; + UserData(UserData&) = delete; + UserData& operator=(const UserData&) = delete; void* data() const { return data_; } private: void* data_; - Deleter deleter_; + Deleter deleter_; }; - // Emplaces opaque user data keyed by `id`. - absl::Status Emplace(std::string id, void* data, - OpaqueUserData::Deleter deleter); - - // Looks up opaque user data keyed by `id`. - absl::StatusOr> Lookup( - std::string_view id) const; - - // Emplaces typed user data constructed from `args`. - template * = nullptr> - absl::Status Emplace(Args&&... args) { - return Insert(GetTypeId(), - std::make_shared(std::forward(args)...)); - } + static TypeId GetNextTypeId(); - // Looks up typed execution context data of type `T`. - template * = nullptr> - absl::StatusOr> Lookup() const { - auto user_data = Lookup(GetTypeId()); - if (!user_data.ok()) return user_data.status(); - return std::static_pointer_cast(*std::move(user_data)); - } - - private: - template * = nullptr> - static int64_t GetTypeId() { - static const char id = 0; - return reinterpret_cast(&id); + template + static TypeId GetTypeId() { + static const TypeId id = GetNextTypeId(); + return id; } - absl::Status Insert(int64_t type_id, std::shared_ptr data); - absl::StatusOr> Lookup(int64_t type_id) const; + absl::Status InsertUserData(TypeId type_id, std::unique_ptr data); + absl::StatusOr LookupUserData(TypeId type_id) const; - absl::flat_hash_map> typed_; - absl::flat_hash_map> opaque_; + absl::flat_hash_map> user_data_; }; +template +absl::Status ExecutionContext::Insert(T* data, Deleter deleter) { + return InsertUserData(GetTypeId(), + std::make_unique( + data, [deleter = std::move(deleter)](void* data) { + if (deleter) deleter(static_cast(data)); + })); +} + +template +absl::Status ExecutionContext::Emplace(Args&&... args) { + return InsertUserData(GetTypeId(), + std::make_unique( + new T(std::forward(args)...), + [](void* data) { delete static_cast(data); })); +} + } // namespace xla::ffi #endif // XLA_FFI_EXECUTION_CONTEXT_H_ diff --git a/third_party/xla/xla/ffi/execution_context_test.cc b/third_party/xla/xla/ffi/execution_context_test.cc index 159e529396b7e7..5c4cb3fdaab666 100644 --- a/third_party/xla/xla/ffi/execution_context_test.cc +++ b/third_party/xla/xla/ffi/execution_context_test.cc @@ -25,36 +25,52 @@ limitations under the License. namespace xla::ffi { -struct StringUserData { - std::string data; -}; - -struct I32UserData : public ExecutionContext::UserData { +struct I32UserData { explicit I32UserData(int32_t value) : value(value) {} int32_t value; }; -TEST(ExecutionContextTest, OpaqueUserData) { - StringUserData string_data = {"foo"}; - auto deleter = [](void*) {}; +struct StrUserData { + explicit StrUserData(std::string value) : value(value) {} + std::string value; +}; +TEST(ExecutionContextTest, EmplaceUserData) { ExecutionContext context; - TF_ASSERT_OK(context.Emplace("foo", &string_data, deleter)); + TF_ASSERT_OK(context.Emplace(42)); + TF_ASSERT_OK(context.Emplace("hello")); - TF_ASSERT_OK_AND_ASSIGN(auto opaque_data, context.Lookup("foo")); - ASSERT_NE(opaque_data, nullptr); + TF_ASSERT_OK_AND_ASSIGN(auto* i32_data, context.Lookup()); + TF_ASSERT_OK_AND_ASSIGN(auto* str_data, context.Lookup()); - StringUserData* user_data = static_cast(opaque_data->data()); - EXPECT_EQ(user_data, &string_data); + ASSERT_NE(i32_data, nullptr); + ASSERT_NE(str_data, nullptr); + ASSERT_EQ(i32_data->value, 42); + ASSERT_EQ(str_data->value, "hello"); } -TEST(ExecutionContextTest, UserData) { +TEST(ExecutionContextTest, InsertUserOwned) { + I32UserData user_data(42); + ExecutionContext context; - TF_ASSERT_OK(context.Emplace(42)); + TF_ASSERT_OK(context.Insert(&user_data)); - TF_ASSERT_OK_AND_ASSIGN(auto i32_data, context.Lookup()); - ASSERT_NE(i32_data, nullptr); - ASSERT_EQ(i32_data->value, 42); + TF_ASSERT_OK_AND_ASSIGN(auto* i32_data, context.Lookup()); + ASSERT_EQ(i32_data, &user_data); +} + +TEST(ExecutionContextTest, InsertUserOwnedWithTypeId) { + TF_ASSERT_OK_AND_ASSIGN( + ExecutionContext::TypeId type_id, + ExecutionContext::RegisterExternalTypeId("I32UserData")); + + I32UserData user_data(42); + + ExecutionContext context; + TF_ASSERT_OK(context.Insert(type_id, &user_data)); + + TF_ASSERT_OK_AND_ASSIGN(auto* i32_data, context.Lookup(type_id)); + ASSERT_EQ(i32_data, &user_data); } TEST(ExecutionContextTest, UserDataNotFound) { diff --git a/third_party/xla/xla/ffi/ffi.h b/third_party/xla/xla/ffi/ffi.h index 4620f8ef63ccc2..629e0f02071d48 100644 --- a/third_party/xla/xla/ffi/ffi.h +++ b/third_party/xla/xla/ffi/ffi.h @@ -331,7 +331,7 @@ struct UserData {}; template struct CtxDecoding> { - using Type = std::shared_ptr; + using Type = T*; static std::optional Decode(const XLA_FFI_Api* api, XLA_FFI_ExecutionContext* ctx, diff --git a/third_party/xla/xla/ffi/ffi_api.cc b/third_party/xla/xla/ffi/ffi_api.cc index d9b3e3874c897d..73c6c7aba412af 100644 --- a/third_party/xla/xla/ffi/ffi_api.cc +++ b/third_party/xla/xla/ffi/ffi_api.cc @@ -289,21 +289,35 @@ static XLA_FFI_Error* XLA_FFI_Stream_Get(XLA_FFI_Stream_Get_Args* args) { return nullptr; } +static XLA_FFI_Error* XLA_FFI_TypeId_Register( + XLA_FFI_TypeId_Register_Args* args) { + XLA_FFI_RETURN_IF_ERROR(ActualStructSizeIsGreaterOrEqual( + "XLA_FFI_ExecutionContext_Get_Args", + XLA_FFI_ExecutionContext_Get_Args_STRUCT_SIZE, args->struct_size)); + + auto type_id = ExecutionContext::RegisterExternalTypeId( + std::string_view(args->name.ptr, args->name.len)); + if (!type_id.ok()) { + return new XLA_FFI_Error{std::move(type_id).status()}; + } + + args->type_id->type_id = type_id->value(); + return nullptr; +} + static XLA_FFI_Error* XLA_FFI_ExecutionContext_Get( XLA_FFI_ExecutionContext_Get_Args* args) { XLA_FFI_RETURN_IF_ERROR(ActualStructSizeIsGreaterOrEqual( "XLA_FFI_ExecutionContext_Get_Args", XLA_FFI_ExecutionContext_Get_Args_STRUCT_SIZE, args->struct_size)); - XLA_FFI_RETURN_IF_ERROR(ActualStructSizeIsGreaterOrEqual( - "XLA_FFI_ByteSpan", XLA_FFI_ByteSpan_STRUCT_SIZE, args->id.struct_size)); - auto opaque = args->ctx->execution_context->Lookup( - std::string_view(args->id.ptr, args->id.len)); - if (!opaque.ok()) { - return new XLA_FFI_Error{std::move(opaque).status()}; + auto user_data = args->ctx->execution_context->Lookup( + ExecutionContext::TypeId(args->type_id->type_id)); + if (!user_data.ok()) { + return new XLA_FFI_Error{std::move(user_data).status()}; } - args->data = (*opaque)->data(); + args->data = *user_data; return nullptr; } @@ -360,12 +374,13 @@ static XLA_FFI_Api api = { &internal_api, - XLA_FFI_Error_Create, // creates error - XLA_FFI_Error_GetMessage, // get error message - XLA_FFI_Error_Destroy, // frees error - XLA_FFI_Handler_Register, // registers handler - XLA_FFI_Stream_Get, // returns platform specific stream - XLA_FFI_ExecutionContext_Get, // returns execution context data + XLA_FFI_Error_Create, + XLA_FFI_Error_GetMessage, + XLA_FFI_Error_Destroy, + XLA_FFI_Handler_Register, + XLA_FFI_Stream_Get, + XLA_FFI_TypeId_Register, + XLA_FFI_ExecutionContext_Get, }; const XLA_FFI_Api* GetXlaFfiApi() { return &api; } diff --git a/third_party/xla/xla/ffi/ffi_test.cc b/third_party/xla/xla/ffi/ffi_test.cc index c95e7e3d28026b..f2c05eaf32313d 100644 --- a/third_party/xla/xla/ffi/ffi_test.cc +++ b/third_party/xla/xla/ffi/ffi_test.cc @@ -615,24 +615,24 @@ TEST(FfiTest, RunOptionsCtx) { TF_ASSERT_OK(status); } -struct MyData : public ExecutionContext::UserData { - explicit MyData(std::string str) : str(std::move(str)) {} +struct StrUserData { + explicit StrUserData(std::string str) : str(std::move(str)) {} std::string str; }; TEST(FfiTest, UserData) { ExecutionContext execution_context; - TF_ASSERT_OK(execution_context.Emplace("foo")); + TF_ASSERT_OK(execution_context.Emplace("foo")); CallFrameBuilder builder; auto call_frame = builder.Build(); - auto fn = [&](std::shared_ptr data) { + auto fn = [&](StrUserData* data) { EXPECT_EQ(data->str, "foo"); return absl::OkStatus(); }; - auto handler = Ffi::Bind().Ctx>().To(fn); + auto handler = Ffi::Bind().Ctx>().To(fn); ServiceExecutableRunOptions opts; opts.mutable_run_options()->set_ffi_execution_context(&execution_context); diff --git a/third_party/xla/xla/service/gpu/custom_call_test.cc b/third_party/xla/xla/service/gpu/custom_call_test.cc index d381f4de52f31c..7eb92d96cc1dd8 100644 --- a/third_party/xla/xla/service/gpu/custom_call_test.cc +++ b/third_party/xla/xla/service/gpu/custom_call_test.cc @@ -724,13 +724,13 @@ TEST_F(CustomCallTest, WithCalledComputation) { // Arbitrary user-defined context passed via the execution context side channel // to a custom call handlers. -struct SomeExtraContext : public ffi::ExecutionContext::UserData { +struct SomeExtraContext { explicit SomeExtraContext(int32_t value) : value(value) {} int32_t value; }; static absl::Status ExecutionContext(ffi::Result, - std::shared_ptr ctx) { + SomeExtraContext* ctx) { if (ctx->value != 42) return absl::InternalError("Unexpected value"); return absl::OkStatus(); } From 68f1e149ebc32b11529a4306b6e109bd8ef85746 Mon Sep 17 00:00:00 2001 From: David Dunleavy Date: Mon, 13 May 2024 10:05:25 -0700 Subject: [PATCH 030/478] Teach `xla_test` about specific GPU backends Inspired by https://github.com/openxla/xla/pull/11753 PiperOrigin-RevId: 633247596 --- third_party/xla/.kokoro/linux/build.sh | 2 +- third_party/xla/xla/service/gpu/BUILD | 70 ++++++----- third_party/xla/xla/service/gpu/tests/BUILD | 31 ++--- third_party/xla/xla/tests/build_defs.bzl | 109 ++++++++++++++++-- third_party/xla/xla/tools/hlo_opt/BUILD | 2 +- .../xla/xla/tools/multihost_hlo_runner/BUILD | 3 +- 6 files changed, 156 insertions(+), 61 deletions(-) diff --git a/third_party/xla/.kokoro/linux/build.sh b/third_party/xla/.kokoro/linux/build.sh index 87bf19cd7ad71f..2a72099e5f5148 100644 --- a/third_party/xla/.kokoro/linux/build.sh +++ b/third_party/xla/.kokoro/linux/build.sh @@ -54,7 +54,7 @@ RBE_FLAGS="" TARGET_FILTERS="-@local_tsl//tsl/platform:subprocess_test -@local_tsl//tsl/platform/cloud:google_auth_provider_test -@local_tsl//tsl/platform/cloud:oauth_client_test" if is_linux_gpu_job ; then - TAGS_FILTER="$TAGS_FILTER,gpu,requires-gpu-nvidia,-no_gpu" + TAGS_FILTER="$TAGS_FILTER,gpu_any,requires-gpu-nvidia,-no_gpu" # We are currently running XLA presubmits on machines with NVIDIA T4 GPUs, # which have a compute compatibility of 7.5. Se we filter out all the tests diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index a851ea99b9b182..34f2210e1d1b04 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -1683,7 +1683,7 @@ xla_test( srcs = if_gpu_is_configured(["gemm_algorithm_picker_test.cc"]), backends = [ "gpu_v100", - ] + if_oss(["gpu"]), + ] + if_oss(["gpu_any"]), deps = [ ":autotuner_util", ":backend_configs_cc", @@ -1976,7 +1976,7 @@ xla_test( srcs = if_gpu_is_configured(["conv_algorithm_picker_test.cc"]), backends = [ "gpu_v100", - ] + if_oss(["gpu"]), + ] + if_oss(["gpu_any"]), tags = [ "noasan", "nomsan", @@ -4636,7 +4636,7 @@ xla_test( }, backends = [ "gpu_a100", - ] + if_oss(["gpu"]), + ] + if_oss(["gpu_any"]), local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]), shard_count = 10, deps = [ @@ -5299,19 +5299,22 @@ xla_test( "gpu_p100", "gpu_v100", ], - deps = if_cuda_is_configured([ - ":dot_operand_converter", - "@com_google_googletest//:gtest", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:string_view", - "//xla:shape_util", - "//xla/hlo/ir:hlo", - "//xla/hlo/utils:hlo_matchers", - "//xla/service:pattern_matcher", - "//xla/tests:hlo_test_base", - "//xla/tests:xla_internal_test_main", - "@local_tsl//tsl/platform:statusor", - ]), + deps = if_cuda_is_configured( + [ + ":dot_operand_converter", + "@com_google_googletest//:gtest", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:string_view", + "//xla:shape_util", + "//xla/hlo/ir:hlo", + "//xla/hlo/utils:hlo_matchers", + "//xla/service:pattern_matcher", + "//xla/tests:hlo_test_base", + "//xla/tests:xla_internal_test_main", + "@local_tsl//tsl/platform:statusor", + ], + ["@local_tsl//tsl/platform:test_main"], # b/317293391 + ), ) cc_library( @@ -5848,28 +5851,31 @@ xla_cc_test( xla_test( name = "determinism_test", - srcs = ["determinism_test.cc"], + srcs = if_gpu_is_configured(["determinism_test.cc"]), backends = [ "gpu_a100", ], local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([ "TENSORFLOW_USE_ROCM=1", ]), - deps = [ - ":autotuner_util", - "//xla:literal", - "//xla:xla_proto_cc", - "//xla/hlo/ir:hlo", - "//xla/service/gpu/tests:gpu_codegen_test", - "//xla/stream_executor:device_description", - "//xla/stream_executor/gpu:gpu_timer", - "//xla/tests:hlo_test_base", - "//xla/tests:literal_test_util", - "//xla/tests:test_utils", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest_main", - "@local_tsl//tsl/platform:statusor", - ], + deps = if_gpu_is_configured( + [ + ":autotuner_util", + "@com_google_googletest//:gtest_main", + "@com_google_absl//absl/strings", + "//xla:literal", + "//xla:xla_proto_cc", + "//xla/hlo/ir:hlo", + "//xla/service/gpu/tests:gpu_codegen_test", + "//xla/stream_executor:device_description", + "//xla/stream_executor/gpu:gpu_timer", + "//xla/tests:hlo_test_base", + "//xla/tests:literal_test_util", + "//xla/tests:test_utils", + "@local_tsl//tsl/platform:statusor", + ], + ["@local_tsl//tsl/platform:test_main"], # b/317293391 + ), ) cc_library( diff --git a/third_party/xla/xla/service/gpu/tests/BUILD b/third_party/xla/xla/service/gpu/tests/BUILD index cc7597cbaf63f6..8b11e1de46be2f 100644 --- a/third_party/xla/xla/service/gpu/tests/BUILD +++ b/third_party/xla/xla/service/gpu/tests/BUILD @@ -530,7 +530,7 @@ xla_test( srcs = ["gpu_kernel_tiling_test.cc"], backends = [ "gpu_p100", - ] + if_oss(["gpu"]), + ] + if_oss(["gpu_any"]), deps = [ ":gpu_codegen_test", "//xla:error_spec", @@ -820,7 +820,7 @@ lit_test_suite( "//xla/tools/hlo_opt:gpu_specs/p100.txtpb", "//xla/tools/hlo_opt:gpu_specs/v100.txtpb", ], - default_tags = tf_cuda_tests_tags(), + default_tags = tf_cuda_tests_tags() + ["gpu_any"], tags_override = { "element_wise_row_vectorization.hlo": ["no_rocm"], "scatter_bf16.hlo": ["no_rocm"], @@ -910,7 +910,7 @@ xla_test( srcs = ["tensor_float_32_global_var_test.cc"], backends = [ "gpu_a100", - ] + if_oss(["gpu"]), + ] + if_oss(["gpu_any"]), deps = [ "//xla:error_spec", "//xla/tests:hlo_test_base", @@ -921,21 +921,24 @@ xla_test( xla_test( name = "gpu_sparse_dot_test", - srcs = ["gpu_sparse_dot_test.cc"], + srcs = if_cuda_is_configured(["gpu_sparse_dot_test.cc"]), backends = [ "gpu_a100", "gpu_h100", ], - deps = [ - ":gpu_codegen_test", - "//third_party/half:includes", - "//xla:literal", - "//xla:literal_util", - "//xla/tests:xla_internal_test_main", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:span", - "@com_google_googletest//:gtest", - ], + deps = if_cuda_is_configured( + [ + ":gpu_codegen_test", + "@com_google_googletest//:gtest", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", + "//third_party/half:includes", + "//xla:literal", + "//xla:literal_util", + "//xla/tests:xla_internal_test_main", + ], + ["@local_tsl//tsl/platform:test_main"], # b/317293391 + ), ) xla_test( diff --git a/third_party/xla/xla/tests/build_defs.bzl b/third_party/xla/xla/tests/build_defs.bzl index 7674095cb20e68..ae74353e783306 100644 --- a/third_party/xla/xla/tests/build_defs.bzl +++ b/third_party/xla/xla/tests/build_defs.bzl @@ -11,7 +11,81 @@ load( ) load("//xla/tests:plugin.bzl", "plugins") -all_backends = ["cpu", "gpu"] + list(plugins.keys()) +# Possible backend values for the GPU family. +GPU_BACKENDS = [ + "gpu_any", + "gpu_p100", + "gpu_v100", + "gpu_a100", + "gpu_h100", +] + +# The generic "gpu" backend includes the actual backends in this list. +GPU_DEFAULT_BACKENDS = [ + "gpu_any", + "gpu_a100", + "gpu_h100", +] + +_DEFAULT_BACKENDS = ["cpu"] + GPU_DEFAULT_BACKENDS + +_ALL_BACKENDS = ["cpu"] + GPU_BACKENDS + list(plugins.keys()) + +# buildifier: disable=function-docstring +def prepare_gpu_backend_data(backends, disabled_backends, backend_tags, backend_args): + # Expand "gpu" backend name into device specific backend names. + new_backends = [name for name in backends if name != "gpu"] + if len(new_backends) < len(backends): + new_backends.extend(GPU_DEFAULT_BACKENDS) + + new_disabled_backends = [name for name in disabled_backends if name != "gpu"] + if len(new_disabled_backends) < len(disabled_backends): + new_disabled_backends.extend(GPU_BACKENDS) + + new_backend_tags = {key: value for key, value in backend_tags.items() if key != "gpu"} + gpu_backend_tags = backend_tags.get("gpu", []) + for key in GPU_BACKENDS: + new_backend_tags.setdefault(key, gpu_backend_tags[:]) + + new_backend_args = {key: value for key, value in backend_args.items() if key != "gpu"} + if "gpu" in backend_args: + for key in GPU_BACKENDS: + new_backend_args.setdefault(key, backend_args["gpu"]) + + # Disable backends that don't meet the device requirements. + sm_requirements = { + "gpu_any": (0, 0), + "gpu_p100": (6, 0), + "gpu_v100": (7, 0), + "gpu_a100": (8, 0), + "gpu_h100": (9, 0), + } + for gpu_backend in GPU_BACKENDS: + all_tags = new_backend_tags[gpu_backend] + requires_gpu = [t for t in all_tags if t.startswith("requires-gpu-")] + requires_sm, only = None, False + for tag in requires_gpu: + if tag.startswith("requires-gpu-sm"): + version = tag.split("-")[2][2:] + sm = (int(version[:-1]), int(version[-1])) + if not requires_sm or sm < requires_sm: + requires_sm = sm + if tag.endswith("-only"): + only = True + if only: + disable = requires_sm != sm_requirements[gpu_backend] + else: + disable = requires_sm and requires_sm > sm_requirements[gpu_backend] + + if disable: + new_disabled_backends.append(gpu_backend) + else: + sm_major, sm_minor = sm_requirements[gpu_backend] + sm_tag = "requires-gpu-nvidia" if sm_major == 0 else "requires-gpu-sm%s%s-only" % (sm_major, sm_minor) + new_backend_tags[gpu_backend] = [t for t in all_tags if t not in requires_gpu] + new_backend_tags[gpu_backend].append(sm_tag) + + return new_backends, new_disabled_backends, new_backend_tags, new_backend_args def xla_test( name, @@ -94,7 +168,11 @@ def xla_test( test_names = [] if not backends: - backends = all_backends + backends = _DEFAULT_BACKENDS + + # Expand "gpu" backend name to specific GPU backends and update tags. + backends, disabled_backends, backend_tags, backend_args = \ + prepare_gpu_backend_data(backends, disabled_backends, backend_tags, backend_args) backends = [ backend @@ -108,15 +186,20 @@ def xla_test( this_backend_copts = [] this_backend_args = backend_args.get(backend, []) this_backend_data = [] + backend_deps = [] if backend == "cpu": - backend_deps = ["//xla/service:cpu_plugin"] - backend_deps += ["//xla/tests:test_macros_cpu"] # buildifier: disable=list-append - elif backend == "gpu": - backend_deps = if_gpu_is_configured(["//xla/service:gpu_plugin"]) - backend_deps += if_gpu_is_configured(["//xla/tests:test_macros_gpu"]) # buildifier: disable=list-append + backend_deps += [ + "//xla/service:cpu_plugin", + "//xla/tests:test_macros_cpu", + ] + elif backend in GPU_BACKENDS: + backend_deps += if_gpu_is_configured([ + "//xla/service:gpu_plugin", + "//xla/tests:test_macros_%s" % backend, + ]) this_backend_tags += tf_gpu_tests_tags() + this_backend_copts.append("-DXLA_TEST_BACKEND_GPU=1") elif backend in plugins: - backend_deps = [] backend_deps += plugins[backend]["deps"] this_backend_copts += plugins[backend]["copts"] this_backend_tags += plugins[backend]["tags"] @@ -197,11 +280,13 @@ def xla_test_library( """ if not backends: - backends = all_backends + backends = _ALL_BACKENDS for backend in backends: this_backend_copts = [] - if backend in ["cpu", "gpu"]: + if backend == "cpu": + backend_deps = ["//xla/tests:test_macros_cpu"] + elif backend in GPU_BACKENDS: backend_deps = ["//xla/tests:test_macros_%s" % backend] elif backend in plugins: backend_deps = plugins[backend]["deps"] @@ -230,7 +315,7 @@ def generate_backend_suites(backends = []): # buildifier: disable=unnamed-macro """ if not backends: - backends = all_backends + backends = _ALL_BACKENDS for backend in backends: native.test_suite( name = "%s_tests" % backend, @@ -244,7 +329,7 @@ def generate_backend_test_macros(backends = []): # buildifier: disable=unnamed- backends: The list of backends to generate libraries for. """ if not backends: - backends = all_backends + backends = _ALL_BACKENDS for backend in backends: manifest = "" if backend in plugins: diff --git a/third_party/xla/xla/tools/hlo_opt/BUILD b/third_party/xla/xla/tools/hlo_opt/BUILD index dd7f62731a27f9..1e042b61849b5d 100644 --- a/third_party/xla/xla/tools/hlo_opt/BUILD +++ b/third_party/xla/xla/tools/hlo_opt/BUILD @@ -176,7 +176,7 @@ lit_test_suite( ]), cfg = "//xla:lit.cfg.py", data = [":test_utilities"], - default_tags = tf_cuda_tests_tags(), + default_tags = tf_cuda_tests_tags() + ["gpu_any"], tags_override = { "gpu_hlo_ptx.hlo": ["no_rocm"], }, diff --git a/third_party/xla/xla/tools/multihost_hlo_runner/BUILD b/third_party/xla/xla/tools/multihost_hlo_runner/BUILD index 5cdc1ce69ca7a4..eb9b29fa9a3364 100644 --- a/third_party/xla/xla/tools/multihost_hlo_runner/BUILD +++ b/third_party/xla/xla/tools/multihost_hlo_runner/BUILD @@ -16,7 +16,7 @@ build_test( name = "hlo_runner_main_build_test", tags = [ "cpu", - "gpu", + "gpu_any", ], targets = [ ":hlo_runner_main", @@ -29,6 +29,7 @@ xla_cc_binary( srcs = ["hlo_runner_main.cc"], tags = [ "gpu", + "gpu_any", "noasan", # Exceeds linker limit. "nomac", ], From 9e28a4c10ee0673c43990ef3802113f27ef317b8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 10:14:07 -0700 Subject: [PATCH 031/478] Fix typo in TFLite conversion doc. PiperOrigin-RevId: 633250884 --- tensorflow/lite/g3doc/models/convert/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/g3doc/models/convert/index.md b/tensorflow/lite/g3doc/models/convert/index.md index 094758708a0faa..5aee45c5c1ae1f 100644 --- a/tensorflow/lite/g3doc/models/convert/index.md +++ b/tensorflow/lite/g3doc/models/convert/index.md @@ -97,7 +97,7 @@ for your model: 1. [Optimization flags](../../performance/model_optimization) allow you to specify the type of optimization to apply during conversion. The most commonly used optimization technique is - [post-training quanitization](). + [post-training quantization](). 1. [Metadata flags](metadata) allow you to add metadata to the converted model which makes it easier to create platform specific wrapper code when deploying models on devices. @@ -142,7 +142,7 @@ format model and a custom runtime environment for that model. converting your model. * See the [optimization overview](../../performance/model_optimization) for guidance on how to optimize your converted model using techniques like - [post-training quanitization](../../performance/post_training_quantization). + [post-training quantization](../../performance/post_training_quantization). * See the [Adding metadata overview](metadata) to learn how to add metadata to your models. Metadata provides other uses a description of your model as well as information that can be leveraged by code generators. From cafd38d0cda9d074cc7b9a5deb817ef73352a946 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 13 May 2024 10:27:27 -0700 Subject: [PATCH 032/478] [pjrt][xla:ffi] Add XLA FFI execution context to PjRt ExecuteContext - Extend PjRt ExecuteContext base class to always include XLA FFI execution context so that end-users can pass data to FFI handlers. - Add an end-to-end test to SE GPU PjrtClient. PJRT C API changes will be in the followup PRs. PiperOrigin-RevId: 633255392 --- third_party/xla/xla/pjrt/BUILD | 1 + third_party/xla/xla/pjrt/gpu/BUILD | 5 ++ .../xla/pjrt/gpu/se_gpu_pjrt_client_test.cc | 62 +++++++++++++++++++ third_party/xla/xla/pjrt/pjrt_executable.h | 14 ++++- .../xla/pjrt/pjrt_stream_executor_client.cc | 4 +- .../xla/xla/service/gpu/gpu_executable.cc | 11 +++- third_party/xla/xla/service/gpu/runtime/BUILD | 1 + .../gpu/runtime/command_buffer_thunk.cc | 2 +- .../service/gpu/runtime/custom_call_thunk.cc | 3 +- .../xla/xla/service/gpu/runtime/thunk.cc | 6 +- .../xla/xla/service/gpu/runtime/thunk.h | 8 +++ 11 files changed, 109 insertions(+), 8 deletions(-) diff --git a/third_party/xla/xla/pjrt/BUILD b/third_party/xla/xla/pjrt/BUILD index 7e6c924261a19b..662c77584df5e7 100644 --- a/third_party/xla/xla/pjrt/BUILD +++ b/third_party/xla/xla/pjrt/BUILD @@ -252,6 +252,7 @@ cc_library( "//xla:xla_data_proto_cc", "//xla:xla_proto_cc", "//xla/client:executable_build_options", + "//xla/ffi:execution_context", "//xla/hlo/ir:hlo", "//xla/service:compiler", "//xla/service:computation_layout", diff --git a/third_party/xla/xla/pjrt/gpu/BUILD b/third_party/xla/xla/pjrt/gpu/BUILD index 9aec3e3b52cbf8..f8991a93a189e5 100644 --- a/third_party/xla/xla/pjrt/gpu/BUILD +++ b/third_party/xla/xla/pjrt/gpu/BUILD @@ -145,7 +145,10 @@ xla_cc_test( "//xla:status_macros", "//xla:statusor", "//xla:test", + "//xla:xla_data_proto_cc", "//xla/client:xla_computation", + "//xla/ffi", + "//xla/ffi:ffi_api", "//xla/pjrt:pjrt_client", "//xla/pjrt:pjrt_executable", "//xla/pjrt:pjrt_future", @@ -153,6 +156,8 @@ xla_cc_test( "//xla/pjrt/distributed:in_memory_key_value_store", "//xla/service:gpu_plugin", "//xla/service:hlo_parser", + "//xla/service:platform_util", + "//xla/stream_executor", "//xla/tests:literal_test_util", "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", diff --git a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client_test.cc b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client_test.cc index 038c3cb0552bb4..5a14973d215cc2 100644 --- a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client_test.cc +++ b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include #include #include #include @@ -34,6 +35,8 @@ limitations under the License. #include "absl/time/clock.h" #include "absl/time/time.h" #include "xla/client/xla_computation.h" +#include "xla/ffi/ffi.h" +#include "xla/ffi/ffi_api.h" #include "xla/literal.h" #include "xla/literal_util.h" #include "xla/pjrt/distributed/in_memory_key_value_store.h" @@ -43,12 +46,15 @@ limitations under the License. #include "xla/pjrt/pjrt_future.h" #include "xla/pjrt/pjrt_stream_executor_client.h" #include "xla/service/hlo_parser.h" +#include "xla/service/platform_util.h" #include "xla/shape.h" #include "xla/shape_util.h" #include "xla/status_macros.h" #include "xla/statusor.h" +#include "xla/stream_executor/stream.h" #include "xla/test.h" #include "xla/tests/literal_test_util.h" +#include "xla/xla_data.pb.h" #include "tsl/lib/core/status_test_util.h" #include "tsl/platform/env.h" #include "tsl/platform/errors.h" @@ -257,6 +263,62 @@ TEST(StreamExecutorGpuClientTest, RecvErrorNoDeadLock) { "of size 8 (0 already transferred)")); } +// User-defined data type to be passed to FFI handler via the execute context +// side channel. +struct MemsetValue { + explicit MemsetValue(float value) : value(value) {} + float value; +}; + +static absl::Status MemsetFromValue( + se::Stream* stream, ffi::Result> result, + MemsetValue* memset_value) { + uint32_t pattern; + std::memcpy(&pattern, &memset_value->value, sizeof(pattern)); + + se::DeviceMemoryBase base = result->data; + return stream->Memset32(&base, pattern, result->data.size()); +} + +XLA_FFI_DEFINE_HANDLER(kMemsetFromValue, MemsetFromValue, + ffi::Ffi::Bind() + .Ctx() + .Ret>() + .Ctx>()); + +XLA_FFI_REGISTER_HANDLER(ffi::GetXlaFfiApi(), "MemsetFromValue", + PlatformUtil::CanonicalPlatformName("GPU").value(), + kMemsetFromValue); + +TEST(StreamExecutorGpuClientTest, ForwardUserDataToFfiHandler) { + static constexpr char const* kProgram = R"( + HloModule ffi_handler + ENTRY main { + ROOT %custom-call = f32[4] custom-call(), + custom_call_target="MemsetFromValue", + api_version=API_VERSION_TYPED_FFI + })"; + + TF_ASSERT_OK_AND_ASSIGN(auto client, + GetStreamExecutorGpuClient(GpuClientOptions())); + TF_ASSERT_OK_AND_ASSIGN(auto executable, + CompileExecutable(kProgram, *client)); + + ExecuteContext context; + TF_ASSERT_OK(context.ffi_context().Emplace(42.0f)); + + ExecuteOptions opts; + opts.context = &context; + + auto result = executable->Execute(/*argument_handles=*/{{}}, opts); + + TF_ASSERT_OK_AND_ASSIGN(std::shared_ptr result_literal, + ExtractSingleResult(result)); + EXPECT_TRUE(LiteralTestUtil::Equal( + LiteralUtil::CreateR1({42.0f, 42.0f, 42.0f, 42.0f}), + *result_literal)); +} + TEST(StreamExecutorGpuClientTest, ToLiteralAsync) { TF_ASSERT_OK_AND_ASSIGN(auto client, GetStreamExecutorGpuClient(GpuClientOptions())); diff --git a/third_party/xla/xla/pjrt/pjrt_executable.h b/third_party/xla/xla/pjrt/pjrt_executable.h index e4477cfde66f8f..176d1c11342007 100644 --- a/third_party/xla/xla/pjrt/pjrt_executable.h +++ b/third_party/xla/xla/pjrt/pjrt_executable.h @@ -32,6 +32,7 @@ limitations under the License. #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "xla/client/executable_build_options.h" +#include "xla/ffi/execution_context.h" #include "xla/hlo/ir/hlo_module.h" #include "xla/layout.h" #include "xla/pjrt/compile_options.pb.h" @@ -152,6 +153,15 @@ struct LoadOptions { class ExecuteContext { public: virtual ~ExecuteContext() = default; + + ffi::ExecutionContext& ffi_context() { return ffi_context_; } + const ffi::ExecutionContext& ffi_context() const { return ffi_context_; } + + private: + // XLA FFI execution context is a mechanism to attach arbitrary user data to + // a particular call of PjRtLoadedExecutable::Execute and forward it to custom + // calls implemented as XLA FFI handlers. + ffi::ExecutionContext ffi_context_; }; struct PjRtTransferMetadata { @@ -213,7 +223,9 @@ struct ExecuteOptions { // the launch IDs may be used by the runtime to detect the mismatch. int32_t launch_id = 0; // If non-null, an opaque context passed to an execution that may be used to - // supply additional arguments to a derived class of PjRtExecutable. + // supply additional arguments to a derived class of PjRtExecutable. It is + // a caller responsibility to ensure that the context is valid for the + // duration of the execution. const ExecuteContext* context = nullptr; // If true, check that the PjRtBuffer argument shapes match the compiled // shapes. Otherwise, any shape with the right size on device may be passed. diff --git a/third_party/xla/xla/pjrt/pjrt_stream_executor_client.cc b/third_party/xla/xla/pjrt/pjrt_stream_executor_client.cc index c726474f054c00..b59fd9f145de05 100644 --- a/third_party/xla/xla/pjrt/pjrt_stream_executor_client.cc +++ b/third_party/xla/xla/pjrt/pjrt_stream_executor_client.cc @@ -2772,7 +2772,9 @@ PjRtStreamExecutorLoadedExecutable::EnqueueExecution( if (run_options.launch_id() != 0) { VLOG(3) << "launch id for " << name() << ": " << run_options.launch_id(); } - + if (options.context != nullptr) { + run_options.set_ffi_execution_context(&options.context->ffi_context()); + } // The choice of where we wait is arbitrary; the reason for the wait is // pacing to avoid problems such as memory fragmentation and running ahead // too far, not for correctness. Placing it before the executable launch diff --git a/third_party/xla/xla/service/gpu/gpu_executable.cc b/third_party/xla/xla/service/gpu/gpu_executable.cc index 4c56c84cdadbe8..c4c55a0aed85af 100644 --- a/third_party/xla/xla/service/gpu/gpu_executable.cc +++ b/third_party/xla/xla/service/gpu/gpu_executable.cc @@ -471,9 +471,14 @@ absl::Status ExecuteThunks( { // Initialize thunks using prepared resources before execution. Thunk::InitializeParams initialize_params{ - executor, executable_source, &buffer_allocations, - main_stream, command_buffer_trace_stream, &collective_params, - &collective_cliques}; + executor, + executable_source, + &buffer_allocations, + main_stream, + command_buffer_trace_stream, + &collective_params, + &collective_cliques, + run_options->run_options().ffi_execution_context()}; tsl::profiler::TraceMe trace([&] { return "Thunks::Initialize"; }); for (const std::unique_ptr& thunk : thunk_sequence) { diff --git a/third_party/xla/xla/service/gpu/runtime/BUILD b/third_party/xla/xla/service/gpu/runtime/BUILD index b648e175da5b62..d5ad69a37b933c 100644 --- a/third_party/xla/xla/service/gpu/runtime/BUILD +++ b/third_party/xla/xla/service/gpu/runtime/BUILD @@ -1093,6 +1093,7 @@ cc_library( ":nccl_clique", ":nccl_clique_key", "//xla:executable_run_options", + "//xla/ffi:execution_context", "//xla/hlo/ir:hlo", "//xla/service:buffer_assignment", "//xla/service:executable", diff --git a/third_party/xla/xla/service/gpu/runtime/command_buffer_thunk.cc b/third_party/xla/xla/service/gpu/runtime/command_buffer_thunk.cc index 2447d279f4ad76..4e74fdcd821c8b 100644 --- a/third_party/xla/xla/service/gpu/runtime/command_buffer_thunk.cc +++ b/third_party/xla/xla/service/gpu/runtime/command_buffer_thunk.cc @@ -153,7 +153,7 @@ absl::Status CommandBufferThunk::Initialize(const InitializeParams& params) { params.collective_cliques, /*device_to_host_stream=*/nullptr, /*host_to_device_stream=*/nullptr, /*send_device_memory_function=*/nullptr, - /*recv_device_memory_function=*/nullptr); + /*recv_device_memory_function=*/nullptr, params.ffi_execution_context); // If command buffer is in `kCreate` state it means that command buffer // sequence was never recorded into it. We initialize all command buffers diff --git a/third_party/xla/xla/service/gpu/runtime/custom_call_thunk.cc b/third_party/xla/xla/service/gpu/runtime/custom_call_thunk.cc index 46656a65cccac5..9c25e984252519 100644 --- a/third_party/xla/xla/service/gpu/runtime/custom_call_thunk.cc +++ b/third_party/xla/xla/service/gpu/runtime/custom_call_thunk.cc @@ -150,12 +150,13 @@ absl::Status CustomCallThunk::ExecuteFfiHandler(const ExecuteParams& params) { builder.AddAttributes(attrs.Build()); CallFrame call_frame = builder.Build(); - // TODO(ezhulenev): Remove `ServiceExecutableRunOptions` from FFI handler + // TODO(b/340104720): Remove `ServiceExecutableRunOptions` from FFI handler // execution context, as apparently it's not easily accessible from Thunk. ExecutableRunOptions run_options; run_options.set_stream(params.stream); run_options.set_allocator(params.buffer_allocations->memory_allocator()); run_options.set_device_ordinal(params.buffer_allocations->device_ordinal()); + run_options.set_ffi_execution_context(params.ffi_execution_context); ServiceExecutableRunOptions service_run_options(run_options); CallOptions options = {&service_run_options, called_computation_}; diff --git a/third_party/xla/xla/service/gpu/runtime/thunk.cc b/third_party/xla/xla/service/gpu/runtime/thunk.cc index 402890a1bc6a95..f066aed07aaa94 100644 --- a/third_party/xla/xla/service/gpu/runtime/thunk.cc +++ b/third_party/xla/xla/service/gpu/runtime/thunk.cc @@ -33,6 +33,7 @@ limitations under the License. #include "absl/types/span.h" #include "mlir/IR/Operation.h" // from @llvm-project #include "xla/executable_run_options.h" +#include "xla/ffi/execution_context.h" #include "xla/hlo/ir/hlo_instruction.h" #include "xla/service/global_device_id.h" #include "xla/service/gpu/backend_configs.pb.h" @@ -185,6 +186,7 @@ Thunk::ExecuteParams Thunk::ExecuteParams::Create( run_options.run_options().host_to_device_stream(), run_options.run_options().send_device_memory_function(), run_options.run_options().recv_device_memory_function(), + run_options.run_options().ffi_execution_context(), additional_compute_streams); } @@ -196,7 +198,7 @@ Thunk::ExecuteParams Thunk::ExecuteParams::CloneWithNewAllocations( params.collective_params, params.collective_cliques, params.device_to_host_stream, params.host_to_device_stream, params.send_device_memory_function, params.recv_device_memory_function, - params.additional_compute_streams); + params.ffi_execution_context, params.additional_compute_streams); } Thunk::ExecuteParams::ExecuteParams( @@ -207,6 +209,7 @@ Thunk::ExecuteParams::ExecuteParams( se::Stream* host_to_device_stream, SendDeviceMemoryFunction* send_device_memory_function, RecvDeviceMemoryFunction* recv_device_memory_function, + const ffi::ExecutionContext* ffi_execution_context, ExecutionStreamIdMap additional_compute_streams) : buffer_allocations(buffer_allocations), stream(stream), @@ -217,6 +220,7 @@ Thunk::ExecuteParams::ExecuteParams( host_to_device_stream(host_to_device_stream), send_device_memory_function(send_device_memory_function), recv_device_memory_function(recv_device_memory_function), + ffi_execution_context(ffi_execution_context), additional_compute_streams(additional_compute_streams) {} //===----------------------------------------------------------------------===// diff --git a/third_party/xla/xla/service/gpu/runtime/thunk.h b/third_party/xla/xla/service/gpu/runtime/thunk.h index 1df2405de6da84..7acd446896fe91 100644 --- a/third_party/xla/xla/service/gpu/runtime/thunk.h +++ b/third_party/xla/xla/service/gpu/runtime/thunk.h @@ -34,6 +34,7 @@ limitations under the License. #include "absl/types/span.h" #include "mlir/IR/Operation.h" // from @llvm-project #include "xla/executable_run_options.h" +#include "xla/ffi/execution_context.h" #include "xla/hlo/ir/hlo_instruction.h" #include "xla/service/buffer_assignment.h" #include "xla/service/global_device_id.h" @@ -327,6 +328,9 @@ class Thunk { // Collective cliques acquired based on resource requests. CollectiveCliques* collective_cliques = nullptr; + + // XLA FFI execution context. + const ffi::ExecutionContext* ffi_execution_context = nullptr; }; //===--------------------------------------------------------------------===// @@ -376,6 +380,9 @@ class Thunk { SendDeviceMemoryFunction* send_device_memory_function; RecvDeviceMemoryFunction* recv_device_memory_function; + // XLA FFI execution context. + const ffi::ExecutionContext* ffi_execution_context; + // Additional compute streams on which thunks launch operations. ExecutionStreamIdMap additional_compute_streams; @@ -390,6 +397,7 @@ class Thunk { se::Stream* host_to_device_stream, SendDeviceMemoryFunction* send_device_memory_function, RecvDeviceMemoryFunction* recv_device_memory_function, + const ffi::ExecutionContext* ffi_execution_context, ExecutionStreamIdMap additional_compute_streams = {}); }; From 3adb649d6f8f21d657d7fa22036f89baf324c06a Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 13 May 2024 10:41:29 -0700 Subject: [PATCH 033/478] [XLA] [NFC] Factor out MaybeOwning into a separate class. PiperOrigin-RevId: 633260396 --- third_party/xla/xla/BUILD | 1 + third_party/xla/xla/literal.cc | 6 +- third_party/xla/xla/literal.h | 89 +-------------- third_party/xla/xla/maybe_owning.h | 107 ++++++++++++++++++ .../xla/xla/service/gpu/gpu_compiler.cc | 105 ++++------------- third_party/xla/xla/util_test.cc | 25 ++++ 6 files changed, 162 insertions(+), 171 deletions(-) create mode 100644 third_party/xla/xla/maybe_owning.h diff --git a/third_party/xla/xla/BUILD b/third_party/xla/xla/BUILD index b22f7021257508..2219ec3a37e78c 100644 --- a/third_party/xla/xla/BUILD +++ b/third_party/xla/xla/BUILD @@ -341,6 +341,7 @@ cc_library( hdrs = [ "iterator_util.h", "map_util.h", + "maybe_owning.h", "overflow_util.h", "util.h", ], diff --git a/third_party/xla/xla/literal.cc b/third_party/xla/xla/literal.cc index 9454cdc4ec2c8c..d7329d76b37120 100644 --- a/third_party/xla/xla/literal.cc +++ b/third_party/xla/xla/literal.cc @@ -223,7 +223,11 @@ std::ostream& operator<<(std::ostream& out, const Literal& literal) { Shape* MutableLiteralBase::mutable_shape_do_not_use() { const Shape* const_shape = shape_.get(); - Shape* shape = shape_.get_mutable(/*ensure_owned=*/true); + if (!shape_.OwnsPtr()) { + shape_ = MaybeOwningShapePtr(std::make_unique(*shape_)); + } + Shape* shape = shape_.get_mutable(); + if (shape != const_shape) { std::function set_piece_shapes = [&set_piece_shapes](const Shape& shape, Piece* piece) { diff --git a/third_party/xla/xla/literal.h b/third_party/xla/xla/literal.h index 8ba5163cfc7dc0..4f8ce8e57fa3b0 100644 --- a/third_party/xla/xla/literal.h +++ b/third_party/xla/xla/literal.h @@ -44,6 +44,7 @@ limitations under the License. #include "xla/index_util.h" #include "xla/layout.h" #include "xla/layout_util.h" +#include "xla/maybe_owning.h" #include "xla/primitive_util.h" #include "xla/printer.h" #include "xla/shape.h" @@ -1302,96 +1303,10 @@ class MutableLiteralBase : public LiteralBase { absl::Span dest_base, absl::Span copy_size); - // A unique_ptr like class which may or may not have ownership of its pointer. // The literal may or may not own the storage of the shape. Creating/copying a // shape can incur significant overhead which in many case we'd like to avoid, // esp. for small literals. - class MaybeOwningShapePtr { - public: - MaybeOwningShapePtr() = default; - explicit MaybeOwningShapePtr(std::unique_ptr unique) - : ptr_and_owning_bit_(TakeUnique(std::move(unique))) {} - - explicit MaybeOwningShapePtr(const Shape* borrowed) - : ptr_and_owning_bit_(Borrow(borrowed)) {} - - ~MaybeOwningShapePtr() { MaybeDeleteOwned(); } - - const Shape* get() const { - return reinterpret_cast(ptr_and_owning_bit_ & kPointerMask); - } - Shape* get_mutable(bool ensure_owned = false) { - const Shape* const_ptr = get(); - // TODO(b/67651157): Remove this copy on write logic and combine get() and - // get_mutable() once we remove mutable_shape_do_not_use(). - if (const_ptr && !OwnsPtr()) { - ptr_and_owning_bit_ = TakeUnique(std::make_unique(*const_ptr)); - const_ptr = get(); - } - DCHECK(OwnsPtr()); - return const_cast(const_ptr); - } - const Shape* operator->() const { return get(); } - const Shape& operator*() const { return *get(); } - - MaybeOwningShapePtr& operator=(std::unique_ptr unique) { - MaybeDeleteOwned(); - ptr_and_owning_bit_ = TakeUnique(std::move(std::move(unique))); - return *this; - } - - MaybeOwningShapePtr& operator=(const Shape* borrowed) { - MaybeDeleteOwned(); - ptr_and_owning_bit_ = Borrow(borrowed); - return *this; - } - - MaybeOwningShapePtr& operator=(MaybeOwningShapePtr&& other) { - using std::swap; - swap(ptr_and_owning_bit_, other.ptr_and_owning_bit_); - return *this; - } - - MaybeOwningShapePtr(const MaybeOwningShapePtr&) = delete; - MaybeOwningShapePtr(MaybeOwningShapePtr&& other) - : ptr_and_owning_bit_(other.ptr_and_owning_bit_) { - other.ptr_and_owning_bit_ = 0; - } - - MaybeOwningShapePtr Clone() const { - const Shape* ptr = get(); - if (ptr && OwnsPtr()) { - return MaybeOwningShapePtr(std::make_unique(*ptr)); - } - return MaybeOwningShapePtr(ptr); - } - - private: - enum : uint64_t { - kOwningBitMask = 1UL, - kPointerMask = ~kOwningBitMask, - }; - static intptr_t TakeUnique(std::unique_ptr unique) { - Shape* released = unique.release(); - DCHECK_EQ(reinterpret_cast(released) & kOwningBitMask, 0); - return reinterpret_cast(released) | kOwningBitMask; - } - - static intptr_t Borrow(const Shape* borrowed) { - DCHECK_EQ(reinterpret_cast(borrowed) & kOwningBitMask, 0); - return reinterpret_cast(borrowed); - } - - bool OwnsPtr() const { return kOwningBitMask & ptr_and_owning_bit_; } - - void MaybeDeleteOwned() { - if (OwnsPtr()) { - delete get(); - } - } - - intptr_t ptr_and_owning_bit_ = 0; - }; + using MaybeOwningShapePtr = MaybeOwning; // The parent class borrows this shape. MaybeOwningShapePtr shape_; diff --git a/third_party/xla/xla/maybe_owning.h b/third_party/xla/xla/maybe_owning.h new file mode 100644 index 00000000000000..4f32472ecb2f95 --- /dev/null +++ b/third_party/xla/xla/maybe_owning.h @@ -0,0 +1,107 @@ +/* Copyright 2024 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_MAYBE_OWNING_H_ +#define XLA_MAYBE_OWNING_H_ + +#include +#include + +// A unique_ptr like class which may or may not have ownership of its pointer. +// Uses least significant bit of the pointer to indicate ownership. +template +class MaybeOwning final { + public: + MaybeOwning() = default; + explicit MaybeOwning(std::unique_ptr unique) + : ptr_and_owning_bit_(TakeUnique(std::move(unique))) {} + + explicit MaybeOwning(const T* borrowed) + : ptr_and_owning_bit_(Borrow(borrowed)) {} + + ~MaybeOwning() { MaybeDeleteOwned(); } + + const T* get() const { return RemoveMask(); } + + T* get_mutable() { return RemoveMask(); } + + const T* operator->() const { return get(); } + const T& operator*() const { return *get(); } + + MaybeOwning& operator=(std::unique_ptr unique) { + MaybeDeleteOwned(); + ptr_and_owning_bit_ = TakeUnique(std::move(std::move(unique))); + return *this; + } + + MaybeOwning& operator=(const T* borrowed) { + MaybeDeleteOwned(); + ptr_and_owning_bit_ = Borrow(borrowed); + return *this; + } + + MaybeOwning& operator=(MaybeOwning&& other) { + using std::swap; + swap(ptr_and_owning_bit_, other.ptr_and_owning_bit_); + return *this; + } + + MaybeOwning(const MaybeOwning&) = delete; + MaybeOwning(MaybeOwning&& other) + : ptr_and_owning_bit_(other.ptr_and_owning_bit_) { + other.ptr_and_owning_bit_ = 0; + } + + MaybeOwning Clone() const { + const T* ptr = get(); + if (ptr && OwnsPtr()) { + return MaybeOwning(std::make_unique(*ptr)); + } + return MaybeOwning(ptr); + } + + bool OwnsPtr() const { return kOwningBitMask & ptr_and_owning_bit_; } + + private: + enum : uint64_t { + kOwningBitMask = 1UL, + kPointerMask = ~kOwningBitMask, + }; + + T* RemoveMask() const { + return reinterpret_cast(ptr_and_owning_bit_ & kPointerMask); + } + + static intptr_t TakeUnique(std::unique_ptr unique) { + T* released = unique.release(); + DCHECK_EQ(reinterpret_cast(released) & kOwningBitMask, 0); + return reinterpret_cast(released) | kOwningBitMask; + } + + static intptr_t Borrow(const T* borrowed) { + DCHECK_EQ(reinterpret_cast(borrowed) & kOwningBitMask, 0); + return reinterpret_cast(borrowed); + } + + void MaybeDeleteOwned() { + if (OwnsPtr()) { + delete get(); + } + } + + intptr_t ptr_and_owning_bit_ = 0; +}; + +#endif // XLA_MAYBE_OWNING_H_ diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index 02673628e26980..8fc3b97cee7654 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -65,6 +65,7 @@ limitations under the License. #include "xla/hlo/ir/hlo_opcode.h" #include "xla/hlo/ir/hlo_schedule.h" #include "xla/hlo/transforms/hlo_constant_splitter.h" +#include "xla/maybe_owning.h" #include "xla/service/all_gather_broadcast_reorder.h" #include "xla/service/all_gather_combiner.h" #include "xla/service/all_reduce_combiner.h" @@ -259,38 +260,10 @@ limitations under the License. namespace xla { namespace gpu { namespace { -// A class for storing either an owned thread pool or a non-owning pointer to an -// external thread pool. -class MaybeOwningThreadPool { - public: - // Gets or creates a thread pool. - // - // See the code for the logic. - static MaybeOwningThreadPool GetOrCreate( - int parallelism, tsl::thread::ThreadPool* default_thread_pool, - int default_parallelism); - - // Not owning (nullptr). - MaybeOwningThreadPool(); - // Not owning. - explicit MaybeOwningThreadPool(tsl::thread::ThreadPool* thread_pool); - // Owning. - explicit MaybeOwningThreadPool( - std::unique_ptr thread_pool); - tsl::thread::ThreadPool* get(); - const tsl::thread::ThreadPool* get() const; - tsl::thread::ThreadPool* operator->(); - const tsl::thread::ThreadPool* operator->() const; - explicit operator bool() const; - bool operator!() const; - private: - std::variant> - thread_pool_; -}; +using MaybeOwningThreadPool = MaybeOwning; -/*static*/ MaybeOwningThreadPool MaybeOwningThreadPool::GetOrCreate( +MaybeOwningThreadPool CreateMaybeOwningThreadPool( int parallelism, tsl::thread::ThreadPool* default_thread_pool, int default_parallelism) { CHECK_GE(parallelism, 0); @@ -320,41 +293,6 @@ class MaybeOwningThreadPool { } } -MaybeOwningThreadPool::MaybeOwningThreadPool() : thread_pool_(nullptr) {} - -MaybeOwningThreadPool::MaybeOwningThreadPool( - tsl::thread::ThreadPool* thread_pool) - : thread_pool_(thread_pool) {} - -MaybeOwningThreadPool::MaybeOwningThreadPool( - std::unique_ptr thread_pool) - : thread_pool_(std::move(thread_pool)) {} - -tsl::thread::ThreadPool* MaybeOwningThreadPool::get() { - if (std::holds_alternative(thread_pool_)) { - return std::get(thread_pool_); - } - return std::get>(thread_pool_).get(); -} - -const tsl::thread::ThreadPool* MaybeOwningThreadPool::get() const { - return const_cast(this)->get(); -} - -tsl::thread::ThreadPool* MaybeOwningThreadPool::operator->() { - tsl::thread::ThreadPool* thread_pool = get(); - CHECK_NE(thread_pool, nullptr); - return thread_pool; -} - -const tsl::thread::ThreadPool* MaybeOwningThreadPool::operator->() const { - return const_cast(this)->operator->(); -} - -MaybeOwningThreadPool::operator bool() const { return get() != nullptr; } - -bool MaybeOwningThreadPool::operator!() const { return get() == nullptr; } - absl::StatusOr GetAutotuneConfig( se::StreamExecutor* stream_exec, const DebugOptions& debug_options, const GpuCompiler::CompileOptions& options, @@ -1230,7 +1168,7 @@ absl::Status GpuCompiler::OptimizeHloModule( CheckNotScheduled(hlo_module); LogDebugOptions(hlo_module); - MaybeOwningThreadPool thread_pool = MaybeOwningThreadPool::GetOrCreate( + MaybeOwningThreadPool thread_pool = CreateMaybeOwningThreadPool( /*parallelism=*/hlo_module->config() .debug_options() .xla_gpu_force_compilation_parallelism(), @@ -1300,7 +1238,8 @@ absl::Status GpuCompiler::OptimizeHloModule( TF_RETURN_IF_ERROR(layout_normalization_pipeline.Run(hlo_module).status()); // Run target-specific HLO optimization passes after layout assignment. TF_RETURN_IF_ERROR(OptimizeHloPostLayoutAssignment( - hlo_module, stream_exec, options, gpu_target_config, thread_pool.get())); + hlo_module, stream_exec, options, gpu_target_config, + thread_pool.get_mutable())); // This is a "low effort, high impact" fusion that should be run first. if (hlo_module->config() @@ -1314,7 +1253,7 @@ absl::Status GpuCompiler::OptimizeHloModule( } TF_RETURN_IF_ERROR(RunFusionPasses(hlo_module, gpu_target_config, - thread_pool.get(), + thread_pool.get_mutable(), ShapeSizeBytesFunction())); TF_RETURN_IF_ERROR(RunPostFusionPasses( hlo_module, @@ -1831,7 +1770,7 @@ GpuCompiler::CompileToTargetBinary(const HloModuleConfig& module_config, MaybeOwningThreadPool thread_pool = module_config.debug_options() .xla_gpu_enable_llvm_module_compilation_parallelism() - ? MaybeOwningThreadPool::GetOrCreate( + ? CreateMaybeOwningThreadPool( /*parallelism=*/module_config.debug_options() .xla_gpu_force_compilation_parallelism(), /*default_thread_pool=*/options.thread_pool, @@ -1845,7 +1784,7 @@ GpuCompiler::CompileToTargetBinary(const HloModuleConfig& module_config, // Disable multi-threading during deviceless AOT compilation. // TODO(anlunx): Enable multi-threading once deviceless AOT compilation is // enabled. - if (!can_use_link_modules || !thread_pool || !stream_exec) { + if (!can_use_link_modules || !thread_pool.get() || !stream_exec) { return CompileSingleModule(module_config, gpu_version, debug_module, llvm_module, /*relocatable=*/false, options, @@ -1904,19 +1843,19 @@ GpuCompiler::CompileToTargetBinary(const HloModuleConfig& module_config, llvm_modules.size()); tsl::BlockingCounter counter(llvm_modules.size()); for (int i = 0; i < llvm_modules.size(); i++) { - thread_pool->Schedule([&compile_results, i, &llvm_modules, &counter, this, - &module_config, &gpu_version, &debug_module, - &options] { - // Each thread has its own context to avoid race conditions. - llvm::LLVMContext new_context; - std::unique_ptr new_module = - CopyToContext(*llvm_modules.at(i), new_context); - compile_results.at(i) = CompileSingleModule( - module_config, gpu_version, debug_module, new_module.get(), - /*relocatable=*/true, options, - /*shard_number=*/i); - counter.DecrementCount(); - }); + thread_pool.get_mutable()->Schedule( + [&compile_results, i, &llvm_modules, &counter, this, &module_config, + &gpu_version, &debug_module, &options] { + // Each thread has its own context to avoid race conditions. + llvm::LLVMContext new_context; + std::unique_ptr new_module = + CopyToContext(*llvm_modules.at(i), new_context); + compile_results.at(i) = CompileSingleModule( + module_config, gpu_version, debug_module, new_module.get(), + /*relocatable=*/true, options, + /*shard_number=*/i); + counter.DecrementCount(); + }); } counter.Wait(); diff --git a/third_party/xla/xla/util_test.cc b/third_party/xla/xla/util_test.cc index 052d8cd2fe7eae..8e442edd68254c 100644 --- a/third_party/xla/xla/util_test.cc +++ b/third_party/xla/xla/util_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include #include +#include #include #include #include @@ -26,6 +27,7 @@ limitations under the License. #include #include +#include "xla/maybe_owning.h" #include "xla/test.h" #include "xla/types.h" #include "tsl/platform/logging.h" @@ -318,5 +320,28 @@ TEST(UtilTest, PackInt4) { } } +TEST(UtilTest, MaybeOwningTestNull) { + MaybeOwning m(nullptr); + EXPECT_EQ(m.get(), nullptr); + EXPECT_EQ(m.get_mutable(), nullptr); +} + +TEST(UtilTest, MaybeOwningTestOwning) { + MaybeOwning m(std::make_unique()); + *m.get_mutable() = 'a'; + EXPECT_EQ(*m, 'a'); +} + +TEST(UtilTest, MaybeOwningTestShared) { + auto owner = std::make_unique(); + *owner = 'x'; + MaybeOwning c1(owner.get()); + MaybeOwning c2(owner.get()); + + EXPECT_EQ(*c1, 'x'); + EXPECT_EQ(*c2, 'x'); + EXPECT_EQ(c1.get(), c2.get()); +} + } // namespace } // namespace xla From 06490c12157d7fa5ada4e529c83a3b3333be7602 Mon Sep 17 00:00:00 2001 From: Kyle Lucke Date: Mon, 13 May 2024 10:49:31 -0700 Subject: [PATCH 034/478] Remove unused constant from stream_executor_pimpl.h. PiperOrigin-RevId: 633263139 --- third_party/xla/xla/stream_executor/stream_executor_pimpl.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/third_party/xla/xla/stream_executor/stream_executor_pimpl.h b/third_party/xla/xla/stream_executor/stream_executor_pimpl.h index a75c23f6348b0a..f57ace700cc9a0 100644 --- a/third_party/xla/xla/stream_executor/stream_executor_pimpl.h +++ b/third_party/xla/xla/stream_executor/stream_executor_pimpl.h @@ -87,10 +87,6 @@ class StreamExecutor : public StreamExecutorInterface { mutable std::unique_ptr device_description_ ABSL_GUARDED_BY(mu_); - // Only one worker thread is needed; little work will be done by the - // executor. - static constexpr int kNumBackgroundThreads = 1; - // Memory limit in bytes. Value less or equal to 0 indicates there is no // limit. int64_t memory_limit_bytes_; From baa2cd805d9bb3a748a35f4a02eedd82d1d55056 Mon Sep 17 00:00:00 2001 From: David Dunleavy Date: Mon, 13 May 2024 10:51:53 -0700 Subject: [PATCH 035/478] Teach `xla_test` about the interpreter backend, fail on unknown backends PiperOrigin-RevId: 633263903 --- third_party/xla/xla/tests/build_defs.bzl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/third_party/xla/xla/tests/build_defs.bzl b/third_party/xla/xla/tests/build_defs.bzl index ae74353e783306..ef4570ed47c18e 100644 --- a/third_party/xla/xla/tests/build_defs.bzl +++ b/third_party/xla/xla/tests/build_defs.bzl @@ -29,7 +29,7 @@ GPU_DEFAULT_BACKENDS = [ _DEFAULT_BACKENDS = ["cpu"] + GPU_DEFAULT_BACKENDS -_ALL_BACKENDS = ["cpu"] + GPU_BACKENDS + list(plugins.keys()) +_ALL_BACKENDS = ["cpu", "interpreter"] + GPU_BACKENDS + list(plugins.keys()) # buildifier: disable=function-docstring def prepare_gpu_backend_data(backends, disabled_backends, backend_tags, backend_args): @@ -199,6 +199,11 @@ def xla_test( ]) this_backend_tags += tf_gpu_tests_tags() this_backend_copts.append("-DXLA_TEST_BACKEND_GPU=1") + elif backend == "interpreter": + backend_deps += [ + "//xla/service:interpreter_plugin", + "//xla/tests:test_macros_interpreter", + ] elif backend in plugins: backend_deps += plugins[backend]["deps"] this_backend_copts += plugins[backend]["copts"] @@ -206,8 +211,7 @@ def xla_test( this_backend_args += plugins[backend]["args"] this_backend_data += plugins[backend]["data"] else: - # Ignore unknown backends. TODO(b/289028518): Change back to fail. - continue + fail("Unknown backend %s" % backend) if xla_test_library_deps: for lib_dep in xla_test_library_deps: @@ -288,6 +292,8 @@ def xla_test_library( backend_deps = ["//xla/tests:test_macros_cpu"] elif backend in GPU_BACKENDS: backend_deps = ["//xla/tests:test_macros_%s" % backend] + elif backend == "interpreter": + backend_deps = ["//xla/tests:test_macros_interpreter"] elif backend in plugins: backend_deps = plugins[backend]["deps"] this_backend_copts += plugins[backend]["copts"] From d90ec6a2b507ea3fd12e5b35c3484ea9ae0d826a Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 13 May 2024 10:55:23 -0700 Subject: [PATCH 036/478] [XLA] [NFC] Remove unused var PiperOrigin-RevId: 633265210 --- third_party/xla/xla/service/gpu/gpu_compiler.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index 8fc3b97cee7654..297eb7b98fdb71 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -421,10 +421,6 @@ GpuThunkAotCompilationResult::LoadExecutable( TF_ASSIGN_OR_RETURN(auto output_info, GetOutputInfo(*hlo_module, *buffer_assignment)); const Shape& output_shape = hlo_module->result_shape(); - std::function buffer_assignment_dumper = [] { - return std::string(); - }; - int64_t debug_buffer_assignment_show_max = hlo_module->config() .debug_options() From 3a61f9eee0024ee36d2a4be6026e339bde837011 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 11:13:32 -0700 Subject: [PATCH 037/478] Add an option (set to false by default, for now) and support for adding strategies for dot operators that trigger windowed einsum. PiperOrigin-RevId: 633271761 --- .../auto_sharding_dot_handler.cc | 180 +++++++++++++++++- .../auto_sharding/auto_sharding_option.cc | 3 + .../auto_sharding/auto_sharding_option.h | 6 + 3 files changed, 188 insertions(+), 1 deletion(-) diff --git a/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_dot_handler.cc b/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_dot_handler.cc index 84c22df6fa0746..8a8bf39312fd5a 100644 --- a/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_dot_handler.cc +++ b/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_dot_handler.cc @@ -23,8 +23,11 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_set.h" #include "absl/log/check.h" #include "absl/log/log.h" +#include "absl/strings/match.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" #include "absl/types/span.h" @@ -87,6 +90,8 @@ class HandlerBase { lhs_(ins->operand(0)), rhs_(ins->operand(1)) {} + virtual ~HandlerBase() = default; + void AppendNewStrategy(const std::string& name, const HloSharding& output_spec, absl::Span input_specs, @@ -113,6 +118,34 @@ class HandlerBase { const std::optional>& communication_cost_fn = std::nullopt); + // Given lhs and rhs dim maps, infers a sharding for the output by relying on + // the sharding_propagation pass. + void MaybeAppendInternal( + const std::string& name, const DimMap& lhs_dim_map, + const DimMap& rhs_dim_map, + const std::optional& expected_output_dim_map, + const Array& device_mesh, double compute_cost = 0, + const std::optional>& + communication_cost_fn = std::nullopt); + + // Given an existing (non-allreduce) sharding candidate, generate a + // corresponding candidate by additionally sharding (if possible) the passed + // in operand, such that, the generated candidate can trigger all-gather + // windowed einsum during partitioning. + virtual void AppendAllGatherWindowedEinsumStrategyForOperand( + int operand_num, const std::string& name, const DimMap& lhs_dim_map, + const DimMap& rhs_dim_map, const DimMap& output_dim_map, + const Array& device_mesh, double compute_cost) {} + + // Given an existing (allreduce) sharding candidate, generate a corresponding + // candidate by additionally sharding (if possible) the dot/conv output, such + // that, the generated candidate can trigger reduce-scatter windowed einsum + // during partitioning. + virtual void AppendReduceScatterWindowedEinsumStrategy( + const std::string& name, const DimMap& lhs_dim_map, + const DimMap& rhs_dim_map, const DimMap& output_dim_map, + const Array& device_mesh, double compute_cost) {} + std::optional GetShardingFromUser(const HloSharding& lhs_spec, const HloSharding& rhs_spec); @@ -177,6 +210,8 @@ class DotHandler : public HandlerBase { const InstructionBatchDimMap& batch_map, const AutoShardingOption& option, const CallGraph& call_graph); + ~DotHandler() override = default; + void SplitLhsSpaceRhsSpace(); void SplitLhsSpaceOnly(); @@ -205,6 +240,16 @@ class DotHandler : public HandlerBase { void Add1DBatchSplit(); + void AppendAllGatherWindowedEinsumStrategyForOperand( + int operand_num, const std::string& name, const DimMap& lhs_dim_map, + const DimMap& rhs_dim_map, const DimMap& output_dim_map, + const Array& device_mesh, double compute_cost) override; + + void AppendReduceScatterWindowedEinsumStrategy( + const std::string& name, const DimMap& lhs_dim_map, + const DimMap& rhs_dim_map, const DimMap& output_dim_map, + const Array& device_mesh, double compute_cost) override; + Status RegisterStrategies(); // Dimension information @@ -228,6 +273,8 @@ class ConvHandler : public HandlerBase { const InstructionBatchDimMap& batch_map, const AutoShardingOption& option, const CallGraph& call_graph); + ~ConvHandler() override = default; + void SplitLhsBatchRhsOutchannel(); void SplitLhsBatchBothInchannel(); @@ -287,7 +334,7 @@ void HandlerBase::AppendNewStrategy(const std::string& name, // TODO(b/309638633) As we build more confidence in this, we should remove // this expected_output_dim_map argument and fully rely on sharding // propagation. -void HandlerBase::MaybeAppend( +void HandlerBase::MaybeAppendInternal( const std::string& name, const DimMap& lhs_dim_map, const DimMap& rhs_dim_map, const std::optional& expected_output_dim_map, @@ -336,6 +383,35 @@ void HandlerBase::MaybeAppend( communication_cost); } +void HandlerBase::MaybeAppend( + const std::string& name, const DimMap& lhs_dim_map, + const DimMap& rhs_dim_map, + const std::optional& expected_output_dim_map, + const Array& device_mesh, double compute_cost, + const std::optional>& + communication_cost_fn) { + MaybeAppendInternal(name, lhs_dim_map, rhs_dim_map, expected_output_dim_map, + device_mesh, compute_cost, communication_cost_fn); + if (!option_.generate_windowed_einsum_strategies || + !expected_output_dim_map.has_value()) { + return; + } + if (absl::StrContains(name, "allreduce")) { + CHECK(communication_cost_fn.has_value()); + AppendReduceScatterWindowedEinsumStrategy(name, lhs_dim_map, rhs_dim_map, + *expected_output_dim_map, + device_mesh, compute_cost); + } else { + CHECK(!communication_cost_fn.has_value()); + AppendAllGatherWindowedEinsumStrategyForOperand( + 0, name, lhs_dim_map, rhs_dim_map, *expected_output_dim_map, + device_mesh, compute_cost); + AppendAllGatherWindowedEinsumStrategyForOperand( + 1, name, lhs_dim_map, rhs_dim_map, *expected_output_dim_map, + device_mesh, compute_cost); + } +} + std::optional HandlerBase::GetShardingFromUser( const HloSharding& lhs_spec, const HloSharding& rhs_spec) { std::unique_ptr ins_clone = ins_->Clone(); @@ -771,6 +847,108 @@ void DotHandler::Add1DBatchSplit() { } } +void DotHandler::AppendAllGatherWindowedEinsumStrategyForOperand( + int operand_num, const std::string& name, const DimMap& lhs_dim_map, + const DimMap& rhs_dim_map, const DimMap& output_dim_map, + const Array& device_mesh, double compute_cost) { + const HloInstruction* operand = ins_->operand(operand_num); + const DimMap& operand_dim_map = operand_num == 0 ? lhs_dim_map : rhs_dim_map; + absl::flat_hash_set sharded_tensor_dims; + absl::flat_hash_set used_mesh_dims; + for (const auto [tensor_dim, mesh_dim] : operand_dim_map) { + if (device_mesh.dim(mesh_dim) == 1) { + continue; + } + sharded_tensor_dims.insert(tensor_dim); + used_mesh_dims.insert(mesh_dim); + } + if (used_mesh_dims.size() == device_mesh_.num_dimensions() || + sharded_tensor_dims.size() == operand->shape().rank()) { + return; + } + + for (int64_t tensor_dim = 0; tensor_dim < operand->shape().rank(); + ++tensor_dim) { + if (sharded_tensor_dims.contains(tensor_dim)) { + continue; + } + for (int64_t mesh_dim = 0; mesh_dim < device_mesh_.num_dimensions(); + ++mesh_dim) { + if (used_mesh_dims.contains(mesh_dim) || + (device_mesh.dim(mesh_dim) == 1)) { + continue; + } + DimMap further_sharded_dim_map = operand_dim_map; + further_sharded_dim_map[tensor_dim] = mesh_dim; + + auto updated_communication_cost_fn = + [](const HloSharding& output_sharding) -> double { + // TODO(331684721): Model costs for windowed einsum + return 100.0; + }; + + std::string updated_name = + absl::StrCat(absl::StrFormat("WindowedEinsum @ {%d,%d,%d}", + operand_num, tensor_dim, mesh_dim), + name); + MaybeAppendInternal( + updated_name, + operand_num == 0 ? further_sharded_dim_map : lhs_dim_map, + operand_num == 1 ? further_sharded_dim_map : rhs_dim_map, + output_dim_map, device_mesh, compute_cost, + updated_communication_cost_fn); + } + } +} + +void DotHandler::AppendReduceScatterWindowedEinsumStrategy( + const std::string& name, const DimMap& lhs_dim_map, + const DimMap& rhs_dim_map, const DimMap& output_dim_map, + const Array& device_mesh, double compute_cost) { + absl::flat_hash_set sharded_tensor_dims; + absl::flat_hash_set used_mesh_dims; + for (const auto [tensor_dim, mesh_dim] : output_dim_map) { + if (device_mesh.dim(mesh_dim) == 1) { + continue; + } + sharded_tensor_dims.insert(tensor_dim); + used_mesh_dims.insert(mesh_dim); + } + if (used_mesh_dims.size() == device_mesh_.num_dimensions() || + sharded_tensor_dims.size() == ins_->shape().rank()) { + return; + } + + for (int64_t tensor_dim = 0; tensor_dim < ins_->shape().rank(); + ++tensor_dim) { + if (sharded_tensor_dims.contains(tensor_dim)) { + continue; + } + for (int64_t mesh_dim = 0; mesh_dim < device_mesh_.num_dimensions(); + ++mesh_dim) { + if (used_mesh_dims.contains(mesh_dim) || + (device_mesh.dim(mesh_dim) == 1)) { + continue; + } + DimMap further_sharded_dim_map = output_dim_map; + further_sharded_dim_map[tensor_dim] = mesh_dim; + + auto updated_communication_cost_fn = + [](const HloSharding& output_sharding) -> double { + // TODO(331684721): Model costs for windowed einsum + return 100.0; + }; + + std::string updated_name = absl::StrCat( + absl::StrFormat("WindowedEinsum @ {%d,%d}", tensor_dim, mesh_dim), + name); + MaybeAppendInternal(updated_name, lhs_dim_map, rhs_dim_map, + further_sharded_dim_map, device_mesh, compute_cost, + updated_communication_cost_fn); + } + } +} + Status DotHandler::RegisterStrategies() { // SS = SR x RS // Split lhs space dim and rhs space dim. diff --git a/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_option.cc b/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_option.cc index e9648436047dd7..857110d78483d9 100644 --- a/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_option.cc +++ b/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_option.cc @@ -136,6 +136,9 @@ std::string AutoShardingOption::ToString() const { lines.push_back(absl::StrCat("model_resharding_memory_costs: ", model_resharding_memory_costs)); + lines.push_back(absl::StrCat("generate_windowed_einsum_strategies: ", + generate_windowed_einsum_strategies)); + return absl::StrJoin(lines, "\n"); } diff --git a/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_option.h b/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_option.h index a858ecfce1d4da..2983d5ac9d7999 100644 --- a/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_option.h +++ b/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_option.h @@ -191,6 +191,12 @@ struct AutoShardingOption { // for resharding edges. bool model_resharding_memory_costs = true; + // Whether or not to generate strategies that model the windowed einsum (or + // collective matmul) optimization + // TODO(331684721,329508561): Generate windowed-einsum strategies by default + // once it is fully implemented. + bool generate_windowed_einsum_strategies = false; + // Prints a debug string. std::string ToString() const; From 3cefc36d7cb4b065a8ebe87c11c9577b9f49bdd5 Mon Sep 17 00:00:00 2001 From: Junwhan Ahn Date: Mon, 13 May 2024 11:17:14 -0700 Subject: [PATCH 038/478] Implement basic memory space support for PjRt CPU This CL implements minimal memory support for PjRt CPU so that memory space APIs can be used consistently across device types. Like GPU, each CPU device for now has just one memory space kind (`device`) and all memory space flavors of transfer APIs redirect the calls to the corresponding versions that take a device instead, assuming that there's only one device to which each memory space is attached. PiperOrigin-RevId: 633272982 --- third_party/xla/xla/pjrt/cpu/BUILD | 3 + third_party/xla/xla/pjrt/cpu/cpu_client.cc | 131 ++++++++++++++++-- third_party/xla/xla/pjrt/cpu/cpu_client.h | 45 +++++- .../xla/xla/pjrt/cpu/cpu_client_test.cc | 20 +++ 4 files changed, 180 insertions(+), 19 deletions(-) diff --git a/third_party/xla/xla/pjrt/cpu/BUILD b/third_party/xla/xla/pjrt/cpu/BUILD index fdc4acf62abae4..a06396197fd617 100644 --- a/third_party/xla/xla/pjrt/cpu/BUILD +++ b/third_party/xla/xla/pjrt/cpu/BUILD @@ -154,6 +154,7 @@ cc_library( "//xla/client:xla_computation", "//xla/hlo/ir:hlo", "//xla/pjrt:compile_options_proto_cc", + "//xla/pjrt:host_memory_spaces", "//xla/pjrt:mlir_to_hlo", "//xla/pjrt:pjrt_client", "//xla/pjrt:pjrt_common", @@ -186,6 +187,7 @@ cc_library( "//xla/service/cpu:simple_orc_jit", "//xla/tsl/concurrency:async_value", "//xla/tsl/concurrency:ref_count", + "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:dynamic_annotations", "@com_google_absl//absl/container:flat_hash_map", @@ -229,6 +231,7 @@ xla_cc_test( "//xla:util", "//xla/ffi", "//xla/ffi:ffi_api", + "//xla/pjrt:host_memory_spaces", "//xla/service:hlo_parser", "//xla/tests:test_utils", "@com_google_absl//absl/status", diff --git a/third_party/xla/xla/pjrt/cpu/cpu_client.cc b/third_party/xla/xla/pjrt/cpu/cpu_client.cc index b0b63a257c7fdd..3746dd4fd1c3d5 100644 --- a/third_party/xla/xla/pjrt/cpu/cpu_client.cc +++ b/third_party/xla/xla/pjrt/cpu/cpu_client.cc @@ -27,7 +27,9 @@ limitations under the License. #include #include +#include "absl/algorithm/container.h" #include "xla/pjrt/cpu/cpu_topology.h" +#include "xla/pjrt/host_memory_spaces.h" #include "xla/pjrt/pjrt_compiler.h" #define EIGEN_USE_THREADS @@ -123,7 +125,8 @@ absl::StatusOr> AllocateDestinationBuffer( AbstractTfrtCpuBuffer::AllocateTrackedDeviceBuffer( on_device_shape, std::move(definition_events))); return std::make_unique( - on_device_shape, std::move(tracked_device_buffer), client, device); + on_device_shape, std::move(tracked_device_buffer), client, device, + *device->default_memory_space()); } absl::StatusOr> AllocateDestinationBufferAndAvs( @@ -309,12 +312,47 @@ Status TfrtCpuDevice::TransferFromOutfeed(MutableBorrowingLiteral literal) { return TransferLiteralFromOutfeedOnCpu(local_hardware_id(), literal); } +void TfrtCpuDevice::AttachMemorySpace(PjRtMemorySpace* memory_space) { + CHECK(memory_space != nullptr); + CHECK(client_ == memory_space->client()) << absl::StrFormat( + "Could not attach a TfrtCpuDevice to a PjRtMemorySpace owned by a " + "different client, the device's client: %s, the memory space's client: " + "%s.", + client_->platform_name(), memory_space->client()->platform_name()); + + memory_spaces_.push_back(memory_space); + memory_spaces_by_id_.emplace(memory_space->kind_id(), memory_space); +} + absl::Span TfrtCpuDevice::memory_spaces() const { - return {}; + return memory_spaces_; } absl::StatusOr TfrtCpuDevice::default_memory_space() const { - return Unimplemented("default_memory_space is not supported"); + return memory_space_by_kind_id(UnpinnedHostMemorySpace::kKindId); +} + +absl::StatusOr TfrtCpuDevice::memory_space_by_kind( + absl::string_view memory_space_kind) const { + auto it = + absl::c_find_if(memory_spaces_, [memory_space_kind](PjRtMemorySpace* ms) { + return ms->kind() == memory_space_kind; + }); + if (it != memory_spaces_.end()) { + return *it; + } + return absl::InternalError( + absl::StrCat("No memory space found (kind: ", memory_space_kind, ")")); +} + +absl::StatusOr TfrtCpuDevice::memory_space_by_kind_id( + int id) const { + auto it = memory_spaces_by_id_.find(id); + if (it == memory_spaces_by_id_.end()) { + return absl::InternalError( + absl::StrCat("No memory space found (kind_id: ", id, ")")); + } + return it->second; } static int CpuDeviceCount() { @@ -417,8 +455,19 @@ TfrtCpuClient::TfrtCpuClient( } } for (int idx = 0; idx < addressable_devices_.size(); ++idx) { - CHECK(addressable_devices_[idx] != nullptr) << idx; + auto* const device = addressable_devices_[idx]; + CHECK(device != nullptr) << idx; + + // Use the device id to construct a globally unique memory space id. We + // do not promise that memory space ids and device ids are the same. + const int id = device->id(); + auto memory_space = std::make_unique(id, device); + tensorflow::down_cast(device)->AttachMemorySpace( + memory_space.get()); + memory_spaces_.push_back(memory_space.get()); + owned_memory_spaces_.push_back(std::move(memory_space)); } + LOG(INFO) << "TfrtCpuClient created."; } @@ -451,7 +500,7 @@ absl::StatusOr TfrtCpuClient::LookupAddressableDevice( } absl::Span TfrtCpuClient::memory_spaces() const { - return {}; + return memory_spaces_; } absl::StatusOr TfrtCpuClient::GetDefaultDeviceAssignment( @@ -844,7 +893,8 @@ TfrtCpuClient::CreateViewOfDeviceBuffer( std::move(on_delete_callback)); return std::unique_ptr(std::make_unique( shape, std::move(tracked_device_buffer), this, - tensorflow::down_cast(device))); + tensorflow::down_cast(device), + *device->default_memory_space())); } absl::StatusOr> TfrtCpuClient::CreateErrorBuffer( @@ -860,7 +910,8 @@ absl::StatusOr> TfrtCpuClient::CreateErrorBuffer( absl::InlinedVector, 4>{ tsl::AsyncValueRef( tsl::MakeErrorAsyncValueRef(std::move(error)))}), - this, tensorflow::down_cast(device)); + this, tensorflow::down_cast(device), + *device->default_memory_space()); } absl::StatusOr> TfrtCpuClient::CreateErrorBuffer( @@ -887,6 +938,13 @@ TfrtCpuClient::CreateBuffersForAsyncHostToDevice(absl::Span shapes, this); } +absl::StatusOr> +TfrtCpuClient::CreateBuffersForAsyncHostToDevice( + absl::Span shapes, PjRtMemorySpace* memory_space) { + CHECK_EQ(memory_space->devices().size(), 1); + return CreateBuffersForAsyncHostToDevice(shapes, memory_space->devices()[0]); +} + absl::StatusOr> TfrtCpuClient::BufferFromHostBuffer( const void* data, PrimitiveType type, absl::Span dims, std::optional> byte_strides, @@ -911,7 +969,38 @@ absl::StatusOr> TfrtCpuClient::BufferFromHostBuffer( return std::unique_ptr(std::make_unique( shape, std::move(tracked_device_buffer), this, - tensorflow::down_cast(device))); + tensorflow::down_cast(device), + *device->default_memory_space())); +} + +absl::StatusOr> TfrtCpuClient::BufferFromHostBuffer( + const void* data, PrimitiveType type, absl::Span dims, + std::optional> byte_strides, + HostBufferSemantics host_buffer_semantics, + absl::AnyInvocable on_done_with_host_buffer, PjRtDevice* device, + const Layout* device_layout) { + if (device_layout != nullptr) { + return absl::UnimplementedError(absl::StrCat( + "BufferFromHostBuffer with an optional device layout is not " + "implemented on platform: ", + platform_name())); + } + return BufferFromHostBuffer(data, type, dims, byte_strides, + host_buffer_semantics, + std::move(on_done_with_host_buffer), device); +} + +absl::StatusOr> TfrtCpuClient::BufferFromHostBuffer( + const void* data, PrimitiveType type, absl::Span dims, + std::optional> byte_strides, + HostBufferSemantics host_buffer_semantics, + absl::AnyInvocable on_done_with_host_buffer, + PjRtMemorySpace* memory_space, const Layout* device_layout) { + CHECK_EQ(memory_space->devices().size(), 1); + return BufferFromHostBuffer(data, type, dims, byte_strides, + host_buffer_semantics, + std::move(on_done_with_host_buffer), + memory_space->devices()[0], device_layout); } absl::StatusOr> @@ -934,14 +1023,22 @@ TfrtCpuClient::BufferFromHostLiteral(const LiteralSlice& literal, return std::unique_ptr(std::move(output_buffer)); } +absl::StatusOr> +TfrtCpuClient::BufferFromHostLiteral(const LiteralSlice& literal, + PjRtMemorySpace* memory_space) { + CHECK_EQ(memory_space->devices().size(), 1); + return BufferFromHostLiteral(literal, memory_space->devices()[0]); +} + TfrtCpuBuffer::TfrtCpuBuffer( Shape on_device_shape, std::unique_ptr tracked_device_buffer, - TfrtCpuClient* client, TfrtCpuDevice* device) + TfrtCpuClient* client, TfrtCpuDevice* device, PjRtMemorySpace* memory_space) : AbstractTfrtCpuBuffer(std::move(on_device_shape), std::move(tracked_device_buffer)), client_(client), - device_(device) {} + device_(device), + memory_space_(memory_space) {} static std::vector> CopyAsyncValues( absl::Span> events) { @@ -994,7 +1091,14 @@ absl::StatusOr> TfrtCpuBuffer::CopyToDevice( return std::unique_ptr(std::make_unique( on_device_shape_, std::move(tracked_device_buffer), client(), - tensorflow::down_cast(dst_device))); + tensorflow::down_cast(dst_device), + *dst_device->default_memory_space())); +} + +absl::StatusOr> TfrtCpuBuffer::CopyToMemorySpace( + PjRtMemorySpace* dst_memory_space) { + CHECK_EQ(dst_memory_space->devices().size(), 1); + return CopyToDevice(dst_memory_space->devices()[0]); } TfrtCpuExecutable::TfrtCpuExecutable( @@ -1580,7 +1684,7 @@ absl::StatusOr TfrtCpuExecutable::ExecuteHelper( std::move(definition_events)); auto leaf_buffer = std::make_unique( result_shape.tuple_shapes(i), std::move(leaf_tracked_device_buffer), - client_, device); + client_, device, *device->default_memory_space()); res.push_back(std::move(leaf_buffer)); } } else { @@ -1601,7 +1705,8 @@ absl::StatusOr TfrtCpuExecutable::ExecuteHelper( std::move(sub_buffers), std::move(sub_buffer_sizes), /*definition_event=*/execute_event); auto tfrt_output_buffer = std::make_unique( - result_shape, std::move(tracked_device_buffer), client_, device); + result_shape, std::move(tracked_device_buffer), client_, device, + *device->default_memory_space()); res.push_back(std::move(tfrt_output_buffer)); } diff --git a/third_party/xla/xla/pjrt/cpu/cpu_client.h b/third_party/xla/xla/pjrt/cpu/cpu_client.h index 4dad5dd3efb83f..554d7bb9c3b0d6 100644 --- a/third_party/xla/xla/pjrt/cpu/cpu_client.h +++ b/third_party/xla/xla/pjrt/cpu/cpu_client.h @@ -233,10 +233,17 @@ class TfrtCpuDevice final : public PjRtDevice { Status TransferFromOutfeed(MutableBorrowingLiteral literal) override; + void AttachMemorySpace(PjRtMemorySpace* memory_space); + absl::Span memory_spaces() const override; absl::StatusOr default_memory_space() const override; + absl::StatusOr memory_space_by_kind( + absl::string_view memory_space_kind) const override; + + absl::StatusOr memory_space_by_kind_id(int id) const; + // Returns a semaphore for admission control on inflight computations. Semaphore& max_inflight_computations_semaphore() { return max_inflight_computations_semaphore_; @@ -250,6 +257,8 @@ class TfrtCpuDevice final : public PjRtDevice { private: PjRtClient* client_ = nullptr; TfrtCpuDeviceDescription description_; + absl::InlinedVector memory_spaces_; + absl::flat_hash_map memory_spaces_by_id_; // TODO(zhangqiaorjc): Optimize semaphore related overhead. // Semaphore used to limit how many programs can be enqueued by the host @@ -335,10 +344,7 @@ class TfrtCpuClient final : public PjRtClient { absl::StatusOr> CreateBuffersForAsyncHostToDevice(absl::Span shapes, - PjRtMemorySpace* memory_space) override { - return Unimplemented( - "CreateBuffersForAsyncHostToDevice with memory_space not implemented."); - } + PjRtMemorySpace* memory_space) override; absl::StatusOr> BufferFromHostBuffer( const void* data, PrimitiveType type, absl::Span dims, @@ -347,9 +353,26 @@ class TfrtCpuClient final : public PjRtClient { absl::AnyInvocable on_done_with_host_buffer, PjRtDevice* device) override; + absl::StatusOr> BufferFromHostBuffer( + const void* data, PrimitiveType type, absl::Span dims, + std::optional> byte_strides, + HostBufferSemantics host_buffer_semantics, + absl::AnyInvocable on_done_with_host_buffer, + PjRtDevice* device, const Layout* device_layout) override; + + absl::StatusOr> BufferFromHostBuffer( + const void* data, PrimitiveType type, absl::Span dims, + std::optional> byte_strides, + HostBufferSemantics host_buffer_semantics, + absl::AnyInvocable on_done_with_host_buffer, + PjRtMemorySpace* memory_space, const Layout* device_layout) override; + absl::StatusOr> BufferFromHostLiteral( const LiteralSlice& literal, PjRtDevice* device) override; + absl::StatusOr> BufferFromHostLiteral( + const LiteralSlice& literal, PjRtMemorySpace* memory_space) override; + absl::StatusOr>> MakeCrossHostReceiveBuffers(absl::Span shapes, PjRtDevice* device, @@ -433,6 +456,11 @@ class TfrtCpuClient final : public PjRtClient { std::vector addressable_devices_; std::unique_ptr computation_placer_; + // Addressable memory spaces. + std::vector> owned_memory_spaces_; + // Pointers to `owned_memory_spaces_`. + std::vector memory_spaces_; + // Thread pool for running PjRtClient tasks. std::unique_ptr pjrt_client_thread_pool_; std::unique_ptr async_work_runner_; @@ -482,14 +510,15 @@ class TfrtCpuBuffer final : public AbstractTfrtCpuBuffer { TfrtCpuBuffer( Shape on_device_shape, std::unique_ptr tracked_device_buffer, - TfrtCpuClient* client, TfrtCpuDevice* device); + TfrtCpuClient* client, TfrtCpuDevice* device, + PjRtMemorySpace* memory_space); TfrtCpuBuffer(const TfrtCpuBuffer&) = delete; TfrtCpuBuffer(TfrtCpuBuffer&&) = delete; TfrtCpuBuffer& operator=(const TfrtCpuBuffer&) = delete; TfrtCpuBuffer& operator=(TfrtCpuBuffer&&) = delete; - PjRtMemorySpace* memory_space() const override { return nullptr; } + PjRtMemorySpace* memory_space() const override { return memory_space_; } TfrtCpuDevice* device() const override { return device_; } TfrtCpuClient* client() const override { return client_; } @@ -502,11 +531,15 @@ class TfrtCpuBuffer final : public AbstractTfrtCpuBuffer { absl::StatusOr> CopyToDevice( PjRtDevice* dst_device) override; + absl::StatusOr> CopyToMemorySpace( + PjRtMemorySpace* dst_memory_space) override; + private: absl::string_view buffer_name() const override { return "TfrtCpuBuffer"; } TfrtCpuClient* client_; TfrtCpuDevice* const device_; + PjRtMemorySpace* const memory_space_; }; class TfrtCpuExecutable final : public PjRtLoadedExecutable { diff --git a/third_party/xla/xla/pjrt/cpu/cpu_client_test.cc b/third_party/xla/xla/pjrt/cpu/cpu_client_test.cc index f8b343d4313f10..1b8075b11013d5 100644 --- a/third_party/xla/xla/pjrt/cpu/cpu_client_test.cc +++ b/third_party/xla/xla/pjrt/cpu/cpu_client_test.cc @@ -15,6 +15,8 @@ limitations under the License. #include "xla/pjrt/cpu/cpu_client.h" +#include "xla/pjrt/host_memory_spaces.h" + #ifndef _WIN32 #include #endif @@ -50,9 +52,11 @@ namespace xla { namespace { using ::testing::Each; +using ::testing::ElementsAre; using ::testing::ElementsAreArray; using ::testing::HasSubstr; using ::testing::IsFalse; +using ::tsl::testing::IsOkAndHolds; static absl::Status TestError(ffi::BufferBase, ffi::Result, ffi::Result) { @@ -69,6 +73,22 @@ XLA_FFI_DEFINE_HANDLER(kTestError, TestError, XLA_FFI_REGISTER_HANDLER(ffi::GetXlaFfiApi(), "__xla_test$$TestError", "Host", kTestError); +TEST(TfrtCpuClientTest, MemorySpace) { + TF_ASSERT_OK_AND_ASSIGN(auto client, GetTfrtCpuClient(CpuClientOptions())); + ASSERT_GE(client->devices().size(), 1); + + ASSERT_EQ(client->memory_spaces().size(), + client->addressable_devices().size()); + for (auto* device : client->devices()) { + TF_ASSERT_OK_AND_ASSIGN(auto* memory_space, device->default_memory_space()); + EXPECT_THAT(device->memory_spaces(), ElementsAre(memory_space)); + EXPECT_EQ(memory_space->kind(), UnpinnedHostMemorySpace::kKind); + EXPECT_EQ(memory_space->kind_id(), UnpinnedHostMemorySpace::kKindId); + EXPECT_THAT(device->memory_space_by_kind(UnpinnedHostMemorySpace::kKind), + IsOkAndHolds(memory_space)); + } +} + TEST(TfrtCpuClientTest, DonationWithExecutionError) { constexpr char kProgram[] = R"(HloModule DonationWithExecutionError, input_output_alias={ {}: (0, {}, must-alias) } From 7e1a1b30710826abb7ab43a9bff765fee1c2cbb1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 11:18:18 -0700 Subject: [PATCH 039/478] Prevent heartbeat cancelled errors at shutdown. Clients disconnect before the service shuts down. However, there is a short time window where the clients send out heartbeat RPCs right before shutting down, which might reach the server after it shuts down. PiperOrigin-RevId: 633273384 --- .../coordination_service_agent.cc | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.cc b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.cc index 59d1abf1b80956..36969eb0025205 100644 --- a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.cc +++ b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.cc @@ -45,7 +45,6 @@ limitations under the License. #include "tsl/platform/mutex.h" #include "tsl/platform/random.h" #include "tsl/platform/status.h" -#include "tsl/platform/statusor.h" #include "tsl/platform/thread_annotations.h" #include "tsl/protobuf/coordination_config.pb.h" #include "tsl/protobuf/coordination_service.pb.h" @@ -72,7 +71,9 @@ class CoordinationServiceAgentImpl : public CoordinationServiceAgent { public: CoordinationServiceAgentImpl() = default; ~CoordinationServiceAgentImpl() override { - absl::Status s = Shutdown(); + // TODO(b/339231167): Fix the lint. + absl::Status s = + Shutdown(); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) VLOG(3) << "Coordination agent dtor failed with status: " << s; } absl::Status Initialize(Env* env, std::string_view job_name, int task_id, @@ -230,7 +231,7 @@ void CoordinationServiceAgentImpl::StopHeartbeat() { shutting_down_ = true; heartbeat_thread_cv_.notify_all(); } - heartbeat_thread_.reset(); + heartbeat_thread_ = nullptr; } absl::Status CoordinationServiceAgentImpl::Connect() { @@ -331,17 +332,21 @@ absl::Status CoordinationServiceAgentImpl::Connect() { }); n.WaitForNotification(); VLOG(10) << "HeartbeatResponse: " << status; - { - mutex_lock l(heartbeat_thread_shutdown_mu_); + if (!status.ok()) { // Ignore heartbeat errors and exit thread if shutting down. For - // example, the agent may send a heartbeat right after Shutdown(), - // but before StopHeartbeat(). This results in an unexpected - // heartbeat error. - if (shutting_down_) { - return; + // example, the agent may send a heartbeat right after Shutdown() + // started, but before StopHeartbeat() and end of Shutdown(). This + // results in an unexpected heartbeat error. + // Waiting for a second allows us to identify if errors are due to + // inflight heartbeats sent during shutdown and can be ignored. + absl::SleepFor(absl::Seconds(1)); + { + mutex_lock l(heartbeat_thread_shutdown_mu_); + + if (shutting_down_) { + return; + } } - } - if (!status.ok()) { SetError(status); } else if (response.leader_incarnation() != leader_incarnation_) { SetError(MakeCoordinationError( @@ -351,8 +356,10 @@ absl::Status CoordinationServiceAgentImpl::Connect() { // Send next heartbeat after an interval. { mutex_lock l(heartbeat_thread_shutdown_mu_); + // TODO(b/339231167): Fix the lint. heartbeat_thread_cv_.wait_for( - l, std::chrono::milliseconds(heartbeat_interval_ms)); + l, std::chrono::milliseconds( // NOLINT(misc-include-cleaner) + heartbeat_interval_ms)); if (shutting_down_) { return; } @@ -583,7 +590,7 @@ absl::StatusOr CoordinationServiceAgentImpl::GetKeyValue( absl::StatusOr CoordinationServiceAgentImpl::GetKeyValue( std::string_view key, absl::Duration timeout) { auto n = std::make_shared(); - auto result = std::make_shared>(); + auto result = std::make_shared>(); GetKeyValueAsync( key, [n, result](const absl::StatusOr& status_or_value) { *result = status_or_value; From a53fd126c00f96c8a8b53e6853b17f9825aa5a26 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 11:39:45 -0700 Subject: [PATCH 040/478] Add fields to GpuTopology for multi-host support. PiperOrigin-RevId: 633280790 --- third_party/xla/xla/pjrt/gpu/gpu_topology.cc | 7 ++++- third_party/xla/xla/pjrt/gpu/gpu_topology.h | 29 +++++++++++++++++-- .../xla/xla/pjrt/gpu/se_gpu_pjrt_client.h | 8 +++-- .../xla/pjrt/gpu/se_gpu_pjrt_client_test.cc | 14 ++++++++- 4 files changed, 50 insertions(+), 8 deletions(-) diff --git a/third_party/xla/xla/pjrt/gpu/gpu_topology.cc b/third_party/xla/xla/pjrt/gpu/gpu_topology.cc index b7593448646d99..e9baf5f359ba66 100644 --- a/third_party/xla/xla/pjrt/gpu/gpu_topology.cc +++ b/third_party/xla/xla/pjrt/gpu/gpu_topology.cc @@ -25,13 +25,18 @@ std::unique_ptr GpuTopology::FromProto( return std::make_unique( std::vector{gpu_topology_proto.device_ids().begin(), gpu_topology_proto.device_ids().end()}, - gpu_topology_proto.platform_version()); + gpu_topology_proto.platform_version(), gpu_topology_proto.num_slices(), + gpu_topology_proto.num_hosts_per_slice(), + gpu_topology_proto.num_devices_per_host()); } GpuTopologyProto GpuTopology::ToProto() const { GpuTopologyProto proto; proto.mutable_device_ids()->Add(device_ids().begin(), device_ids().end()); proto.set_platform_version(platform_version()); + proto.set_num_slices(num_slices()); + proto.set_num_hosts_per_slice(num_hosts_per_slice()); + proto.set_num_devices_per_host(num_devices_per_host()); return proto; } diff --git a/third_party/xla/xla/pjrt/gpu/gpu_topology.h b/third_party/xla/xla/pjrt/gpu/gpu_topology.h index 1412376b8a7be7..9c1e191bc4b72b 100644 --- a/third_party/xla/xla/pjrt/gpu/gpu_topology.h +++ b/third_party/xla/xla/pjrt/gpu/gpu_topology.h @@ -26,21 +26,44 @@ namespace xla { class GpuTopology { public: explicit GpuTopology(const std::vector& gpu_device_ids, - absl::string_view platform_version) - : devices_ids_(gpu_device_ids), platform_version_(platform_version) {} + absl::string_view platform_version, int32_t num_slices, + int32_t num_hosts_per_slice, + int32_t num_devices_per_host) + : devices_ids_(gpu_device_ids), + platform_version_(platform_version), + num_slices_(num_slices), + num_hosts_per_slice_(num_hosts_per_slice), + num_devices_per_host_(num_devices_per_host) {} - int number_of_devices() const { return devices_ids_.size(); } + bool operator==(const GpuTopology& other) const { + return devices_ids_ == other.devices_ids_ && + platform_version_ == other.platform_version_ && + num_slices_ == other.num_slices_ && + num_hosts_per_slice_ == other.num_hosts_per_slice_ && + num_devices_per_host_ == other.num_devices_per_host_; + } + + int number_of_devices() const { + return number_of_hosts() * num_devices_per_host_; + } const std::vector& device_ids() const { return devices_ids_; } + int number_of_hosts() const { return num_slices_ * num_hosts_per_slice_; } static std::unique_ptr FromProto( const GpuTopologyProto& proto); GpuTopologyProto ToProto() const; std::string platform_version() const { return platform_version_; } + int32_t num_slices() const { return num_slices_; } + int32_t num_hosts_per_slice() const { return num_hosts_per_slice_; } + int32_t num_devices_per_host() const { return num_devices_per_host_; } private: const std::vector devices_ids_; const std::string platform_version_; + const int32_t num_slices_; + const int32_t num_hosts_per_slice_; + const int32_t num_devices_per_host_; }; } // namespace xla diff --git a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.h b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.h index 90717f87fbe6c5..2101501050a670 100644 --- a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.h +++ b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.h @@ -87,15 +87,17 @@ class StreamExecutorGpuTopologyDescription : public PjRtTopologyDescription { : platform_id_(platform_id), platform_name_(platform_name), platform_version_(platform_version), - gpu_topology_(gpu_device_ids, platform_version), + // TODO(b/331224674): Add support for multi-host. + gpu_topology_(gpu_device_ids, platform_version, /*num_slices=*/1, + /*num_hosts_per_slice=*/1, + /*num_devices_per_host=*/gpu_device_ids.size()), attributes_(attributes) {} bool operator==(const StreamExecutorGpuTopologyDescription& other) const { return this->platform_id() == other.platform_id() && this->platform_name() == other.platform_name() && this->platform_version() == other.platform_version() && - this->gpu_topology().device_ids() == - other.gpu_topology().device_ids(); + this->gpu_topology() == other.gpu_topology(); } PjRtPlatformId platform_id() const override { return platform_id_; } diff --git a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client_test.cc b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client_test.cc index 5a14973d215cc2..d3bb175754e1fb 100644 --- a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client_test.cc +++ b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client_test.cc @@ -640,20 +640,32 @@ TEST(GpuTopology, FromProto) { R"pb( device_ids: [ 3, 2, 1 ] platform_version: "platform_version" + num_slices: 2 + num_hosts_per_slice: 1 + num_devices_per_host: 3 )pb", &msg)); std::unique_ptr gpu_topology = GpuTopology::FromProto(msg); EXPECT_THAT(gpu_topology->device_ids(), ElementsAre(3, 2, 1)); EXPECT_THAT(gpu_topology->platform_version(), "platform_version"); + EXPECT_THAT(gpu_topology->num_slices(), 2); + EXPECT_THAT(gpu_topology->num_hosts_per_slice(), 1); + EXPECT_THAT(gpu_topology->num_devices_per_host(), 3); } TEST(GpuTopology, ToProto) { GpuTopology gpu_topology(/*gpu_device_ids=*/{3, 2, 1}, - /*platform_version=*/"platform_version"); + /*platform_version=*/"platform_version", + /*num_slices=*/2, + /*num_hosts_per_slice=*/1, + /*num_devices_per_host=*/3); GpuTopologyProto msg = gpu_topology.ToProto(); EXPECT_THAT(msg.device_ids(), ElementsAre(3, 2, 1)); EXPECT_THAT(msg.platform_version(), "platform_version"); + EXPECT_THAT(msg.num_slices(), 2); + EXPECT_THAT(msg.num_hosts_per_slice(), 1); + EXPECT_THAT(msg.num_devices_per_host(), 3); } TEST(StreamExecutorGpuClientTest, DistributedInit) { From f24bdda19768b8fcd230326d5b6e1b0b27be4fce Mon Sep 17 00:00:00 2001 From: Parker Schuh Date: Mon, 13 May 2024 11:44:31 -0700 Subject: [PATCH 041/478] manual dims must be excluded when computing the new tile assignment, or num_new_tiles will always be > NumTiles() + 1 and sharding propagation will infinitely reassign the same partial manual shardings. PiperOrigin-RevId: 633282263 --- .../xla/xla/hlo/utils/hlo_sharding_util.cc | 19 +++++++++++++------ .../xla/hlo/utils/hlo_sharding_util_test.cc | 11 +++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/third_party/xla/xla/hlo/utils/hlo_sharding_util.cc b/third_party/xla/xla/hlo/utils/hlo_sharding_util.cc index 96a2506d59ce5b..0133703e0abd68 100644 --- a/third_party/xla/xla/hlo/utils/hlo_sharding_util.cc +++ b/third_party/xla/xla/hlo/utils/hlo_sharding_util.cc @@ -352,6 +352,13 @@ bool MergeShardingIfCompatible(const HloSharding& to_merge, } } + const int64_t num_devices = to_merge.tile_assignment().num_elements(); + const int64_t new_num_tiles = Product(merged_tile_dims); + if (num_devices % new_num_tiles != 0 || new_num_tiles < minimum_tiles) { + return false; + } + int64_t replication; + if (to_merge_man_dim >= 0) { int64_t man_group_size = to_merge.tile_assignment().dim(to_merge_man_dim); if (man_group_size != dst->tile_assignment().dim(dst_man_dim)) { @@ -365,14 +372,14 @@ bool MergeShardingIfCompatible(const HloSharding& to_merge, merged_tile_dims.push_back(man_group_size); num_merge_groups *= man_group_size; num_dst_groups *= man_group_size; + if (num_devices % (new_num_tiles * man_group_size) != 0) { + return false; + } + replication = num_devices / (new_num_tiles * man_group_size); + } else { + replication = num_devices / new_num_tiles; } - const int64_t num_devices = to_merge.tile_assignment().num_elements(); - const int64_t new_num_tiles = Product(merged_tile_dims); - if (num_devices % new_num_tiles != 0 || new_num_tiles < minimum_tiles) { - return false; - } - const int64_t replication = num_devices / new_num_tiles; if (replication > 1) { merged_tile_dims.push_back(replication); } diff --git a/third_party/xla/xla/hlo/utils/hlo_sharding_util_test.cc b/third_party/xla/xla/hlo/utils/hlo_sharding_util_test.cc index 126f414c789609..c9902e6a6ad14c 100644 --- a/third_party/xla/xla/hlo/utils/hlo_sharding_util_test.cc +++ b/third_party/xla/xla/hlo/utils/hlo_sharding_util_test.cc @@ -529,6 +529,17 @@ TEST(HloShardingUtilTest, EXPECT_EQ(result, output_sharding); } +TEST(HloShardingUtilTest, MergeManualSubgroupSharding) { + TileAssignment tile_assignment({16, 4}); + std::vector subgroup_types = {OpSharding::MANUAL, + OpSharding::REPLICATED}; + // Subgroup sharding + // {devices=[16,4]<=[64] last_tile_dims={manual, replicated}} + HloSharding dst = HloSharding::Subgroup(tile_assignment, subgroup_types); + HloSharding to_merge = dst; + EXPECT_FALSE(MergeShardingIfCompatible(to_merge, dst.NumTiles() + 1, &dst)); +} + TEST(HloShardingUtilTest, GetManualSubgroupSharding_ManualOnly) { TileAssignment tile_assignment({1, 2, 2}); std::vector subgroup_types = {OpSharding::MANUAL}; From dadff518bb1eace98ad3f3d7ceed81319736cfbe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 11:53:10 -0700 Subject: [PATCH 042/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633284964 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 68d78d0329a9ac..53c362ed0ab819 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/nodeserialize +go/debugstr op { name: "Abort" attr { From 335abf5f52a0f2bb678cf93ca701de1245c5c86f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 12:09:05 -0700 Subject: [PATCH 043/478] Fix populate of supported_subgroup_sizes PiperOrigin-RevId: 633290357 --- tensorflow/lite/delegates/gpu/cl/cl_device.cc | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.cc b/tensorflow/lite/delegates/gpu/cl/cl_device.cc index 852cc95eedf261..4f3ea93b1cf792 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_device.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_device.cc @@ -344,21 +344,20 @@ GpuInfo GpuInfoFromDeviceID(cl_device_id id, cl_platform_id platform_id) { } } - if (info.IsIntel()) { - if (info.SupportsExtension("cl_intel_required_subgroup_size")) { - size_t sub_groups_count; - cl_int status = - clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, 0, - nullptr, &sub_groups_count); + if (info.SupportsExtension("cl_intel_required_subgroup_size")) { + size_t sub_groups_ret_size; + cl_int status = + clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, 0, + nullptr, &sub_groups_ret_size); + if (status == CL_SUCCESS) { + size_t sub_groups_count = sub_groups_ret_size / sizeof(size_t); + std::vector sub_group_sizes(sub_groups_count); + status = + clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, + sub_groups_ret_size, sub_group_sizes.data(), nullptr); if (status == CL_SUCCESS) { - std::vector sub_group_sizes(sub_groups_count); - status = clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, - sizeof(size_t) * sub_groups_count, - sub_group_sizes.data(), nullptr); - if (status == CL_SUCCESS) { - for (int i = 0; i < sub_groups_count; ++i) { - info.supported_subgroup_sizes.push_back(sub_group_sizes[i]); - } + for (int i = 0; i < sub_groups_count; ++i) { + info.supported_subgroup_sizes.push_back(sub_group_sizes[i]); } } } From 88c4ac653ec8e04644a28dbbb25dbe5987a20a83 Mon Sep 17 00:00:00 2001 From: Mason Chang Date: Mon, 13 May 2024 12:11:09 -0700 Subject: [PATCH 044/478] Keep identity ops that have the same input/output device assignment on the same devices PiperOrigin-RevId: 633290991 --- tensorflow/core/common_runtime/placer.cc | 39 +++++- tensorflow/core/common_runtime/placer_test.cc | 126 ++++++++++++++++++ 2 files changed, 164 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/placer.cc b/tensorflow/core/common_runtime/placer.cc index 05dc029cc74756..5908b26a898690 100644 --- a/tensorflow/core/common_runtime/placer.cc +++ b/tensorflow/core/common_runtime/placer.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/graph_node_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/path.h" @@ -117,6 +118,42 @@ bool IsGeneratorNode(const Node* node) { !IsRefType(node->output_type(0)); } +// If a node is an Identity op with input and output on the same device, +// assign this Identity the same device. If the node already has a requested +// or assigned device, don't touch it. +bool MatchIdentityOperation(const Node* node) { + if (!node) { + return false; + } + + if (!node->IsIdentity()) { + return false; + } + + if (node->has_assigned_device_name()) { + return false; + } + + if (!node->requested_device().empty()) { + return false; + } + + // Strictly only check for IDENTITY nodes with only 1 input and + // 1 output edge. + if (node->in_edges().size() != 1) { + return false; + } + + if (node->out_edges().size() != 1) { + return false; + } + + const Node* input = *node->in_nodes().begin(); + const Node* output = *node->out_nodes().begin(); + + return input->requested_device() == output->requested_device(); +} + void LogDeviceAssignment(const Node* node, bool log_device_placement) { // Log placement if log_device_placement is set. if (log_device_placement) { @@ -257,7 +294,7 @@ Status Placer::Run(const GraphOptimizationPassOptions& options) { // Heuristic B: If the node only operates on metadata, not data, // then it is desirable to place that metadata node with its // input. - if (IsMetadata(node)) { + if (IsMetadata(node) || MatchIdentityOperation(node)) { // Make sure that the input device type is in the list of supported // device types for this node. const Node* input = (*node->in_edges().begin())->src(); diff --git a/tensorflow/core/common_runtime/placer_test.cc b/tensorflow/core/common_runtime/placer_test.cc index aeca73546468c0..6d76bc7d12c86e 100644 --- a/tensorflow/core/common_runtime/placer_test.cc +++ b/tensorflow/core/common_runtime/placer_test.cc @@ -213,6 +213,8 @@ REGISTER_OP("TestTypedConsumer").Input("i: variant"); REGISTER_KERNEL_BUILDER(Name("TestTypedConsumer").Device("FakeCPU"), DummyOp); REGISTER_KERNEL_BUILDER(Name("TestTypedConsumer").Device("FakeGPU"), DummyOp); +REGISTER_OP("ConvertToListOfCooTensorsV2").Input("i: int32"); + //////////////////////////////////////////////////////////////////////////////// // // A PlacerTest method has three phases: @@ -1948,6 +1950,9 @@ REGISTER_KERNEL_BUILDER(Name("Add").Device("FakeCPU"), DummyOp); REGISTER_KERNEL_BUILDER(Name("Add").Device("FakeGPU"), DummyOp); REGISTER_KERNEL_BUILDER(Name("PartitionedCall").Device("FakeCPU"), DummyOp); REGISTER_KERNEL_BUILDER(Name("PartitionedCall").Device("FakeGPU"), DummyOp); +REGISTER_KERNEL_BUILDER(Name("ConvertToListOfCooTensorsV2").Device("FakeCPU"), + DummyOp); +REGISTER_KERNEL_BUILDER(Name("Cast").Device("FakeCPU"), DummyOp); TEST_P(SoftPlacementPlacerTest, RequestedDeviceOnResourceGeneratorIsTreatedAsAssigned) { @@ -3108,5 +3113,126 @@ TEST_F(NestedPlacerTest, IndirectRecursion) { << s.ToString(); } +TEST_F(PlacerTest, IdentityMatchesInputAndOutputPlacement) { + /* + * Op Input (assigned to task:1) + * | + * v + * // Tests that this gets reassigned to task:1 + * Identity (No Assignment) + * | + * v + * Op Output (assigned to task:1) + */ + const std::string task0_device = "/job:b/replica:0/task:0/device:FakeCPU:0"; + const std::string task1_device = "/job:b/replica:0/task:1/device:FakeCPU:0"; + + GraphDef graph = GDef({ + NDef("a", "_Arg", {}, {{"T", DT_FLOAT}}, task1_device), + NDef("identity1", "Identity", {"a"}, {{"T", DT_FLOAT}}, task1_device), + NDef("identity2", "Identity", {"identity1:0"}, {{"T", DT_FLOAT}}), + NDef("cast", "Cast", {"identity2:0"}, + {{"SrcT", DT_FLOAT}, {"DstT", DT_INT32}}, task1_device), + NDef("COO", "ConvertToListOfCooTensorsV2", {"cast:0"}, {{"T", DT_INT32}}, + task1_device), + }); + + Graph g(OpRegistry::Global()); + + DeviceSet multiple_tasks; + std::unique_ptr task0_cpu(FakeDevice::MakeCPU(task0_device)); + multiple_tasks.AddDevice(task0_cpu.get()); + + std::unique_ptr task1_cpu(FakeDevice::MakeCPU(task1_device)); + multiple_tasks.AddDevice(task1_cpu.get()); + + TF_ASSERT_OK(BuildGraph(graph, &g)); + + absl::Status s = Place(&g, &multiple_tasks); + TF_ASSERT_OK(s); + + Node* identity2 = GetNodeByName(g, "identity2"); + EXPECT_EQ(identity2->assigned_device_name().c_str(), task1_device); +} + +TEST_F(PlacerTest, IdentityWithoutOutputDoesntCrash) { + /* + * Op Input (assigned to task:1) + * | + * v + * // Tests that this doesn't crash. + * Identity (No output) + */ + const std::string task0_device = "/job:b/replica:0/task:0/device:FakeCPU:0"; + const std::string task1_device = "/job:b/replica:0/task:1/device:FakeCPU:0"; + + GraphDef graph = GDef({ + NDef("a", "_Arg", {}, {{"T", DT_FLOAT}}, task1_device), + NDef("identity1", "Identity", {"a"}, {{"T", DT_FLOAT}}, task1_device), + NDef("identity2", "Identity", {"identity1:0"}, {{"T", DT_FLOAT}}), + }); + + Graph g(OpRegistry::Global()); + + DeviceSet multiple_tasks; + std::unique_ptr task0_cpu(FakeDevice::MakeCPU(task0_device)); + multiple_tasks.AddDevice(task0_cpu.get()); + + std::unique_ptr task1_cpu(FakeDevice::MakeCPU(task1_device)); + multiple_tasks.AddDevice(task1_cpu.get()); + + TF_ASSERT_OK(BuildGraph(graph, &g)); + Node* identity2 = GetNodeByName(g, "identity2"); + const Edge* out_edge = *identity2->out_edges().begin(); + + g.RemoveEdge(out_edge); + + absl::Status s = Place(&g, &multiple_tasks); + TF_ASSERT_OK(s); +} + +TEST_F(PlacerTest, IdentityDoesntMatchWithMultipleOutput) { + /* + * Op Input (assigned to task:1) + * | + * v + * // Tests that identity gets assigned to default task:0 + * Identity (No Assignment) + * | + * v + * Multiple Op Output (assigned to task:1) + */ + const std::string task0_device = "/job:b/replica:0/task:0/device:FakeCPU:0"; + const std::string task1_device = "/job:b/replica:0/task:1/device:FakeCPU:0"; + + GraphDef graph = GDef({ + NDef("a", "_Arg", {}, {{"T", DT_FLOAT}}, task1_device), + NDef("identity1", "Identity", {"a"}, {{"T", DT_FLOAT}}, task1_device), + NDef("identity2", "Identity", {"identity1:0"}, {{"T", DT_FLOAT}}), + NDef("cast", "Cast", {"identity2:0"}, + {{"SrcT", DT_FLOAT}, {"DstT", DT_INT32}}, task1_device), + NDef("COO", "ConvertToListOfCooTensorsV2", {"cast:0"}, {{"T", DT_INT32}}, + task1_device), + NDef("identity3", "Identity", {"identity2:0"}, {{"T", DT_FLOAT}}), + }); + + Graph g(OpRegistry::Global()); + + DeviceSet multiple_tasks; + std::unique_ptr task0_cpu(FakeDevice::MakeCPU(task0_device)); + multiple_tasks.AddDevice(task0_cpu.get()); + + std::unique_ptr task1_cpu(FakeDevice::MakeCPU(task1_device)); + multiple_tasks.AddDevice(task1_cpu.get()); + + TF_ASSERT_OK(BuildGraph(graph, &g)); + + absl::Status s = Place(&g, &multiple_tasks); + TF_ASSERT_OK(s); + + Node* identity2 = GetNodeByName(g, "identity2"); + EXPECT_EQ(identity2->assigned_device_name().c_str(), task0_device); +} + } // namespace } // namespace tensorflow From 6607b1f2e6d2aa9b06669bf8a69b9f628212e6c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 12:26:49 -0700 Subject: [PATCH 045/478] Fix C++ dtor bypass virtual call dispatch lint. PiperOrigin-RevId: 633295761 --- .../coordination/coordination_service_agent.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.cc b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.cc index 36969eb0025205..1d090c7a0ffbc7 100644 --- a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.cc +++ b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.cc @@ -71,9 +71,7 @@ class CoordinationServiceAgentImpl : public CoordinationServiceAgent { public: CoordinationServiceAgentImpl() = default; ~CoordinationServiceAgentImpl() override { - // TODO(b/339231167): Fix the lint. - absl::Status s = - Shutdown(); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) + absl::Status s = ShutdownInternal(); VLOG(3) << "Coordination agent dtor failed with status: " << s; } absl::Status Initialize(Env* env, std::string_view job_name, int task_id, @@ -139,6 +137,8 @@ class CoordinationServiceAgentImpl : public CoordinationServiceAgent { void StopHeartbeat(); private: + absl::Status ShutdownInternal(); + Env* env_ = nullptr; // Not owned. const uint64_t incarnation_id_ = random::New64(); CoordinatedTask task_; @@ -477,6 +477,10 @@ absl::Status CoordinationServiceAgentImpl::ReportError( } absl::Status CoordinationServiceAgentImpl::Shutdown() { + return ShutdownInternal(); +} + +absl::Status CoordinationServiceAgentImpl::ShutdownInternal() { absl::Status status = absl::OkStatus(); bool is_connected = false; { From 86ea1d18bf750ba46c3998422b84eb2c6c3cd306 Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Mon, 13 May 2024 12:29:26 -0700 Subject: [PATCH 046/478] Propagate status from sparse reduction op to avoid crash. Fixes #65865 PiperOrigin-RevId: 633296435 --- tensorflow/core/kernels/BUILD | 2 +- tensorflow/core/kernels/sparse_reduce_op.cc | 26 ++++++++++++++------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index c541597c8d9d80..158ace64ba184b 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -5138,7 +5138,7 @@ tf_kernel_library( tf_kernel_library( name = "sparse_reduce_op", prefix = "sparse_reduce_op", - deps = SPARSE_DEPS, + deps = SPARSE_DEPS + ["@com_google_absl//absl/status"], ) tf_kernel_library( diff --git a/tensorflow/core/kernels/sparse_reduce_op.cc b/tensorflow/core/kernels/sparse_reduce_op.cc index 348a73e0816280..8f875053c66579 100644 --- a/tensorflow/core/kernels/sparse_reduce_op.cc +++ b/tensorflow/core/kernels/sparse_reduce_op.cc @@ -17,6 +17,7 @@ limitations under the License. #define EIGEN_USE_THREADS +#include "absl/status/status.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/op_requires.h" #include "tensorflow/core/framework/register_types.h" @@ -47,13 +48,14 @@ struct ReduceDetails { // Compute common reduce parameters that'll be used for SparseTensor // reductions. Usage: -// ReduceDetails reduction = SparseTensorReduceHelper(sp, axes, keep_dims); -// sp.Reorder(reduction.reorder_dims); -// for (const auto& g : sp.group(reduction.group_by_dims)) { +// StatusOr reduction = +// SparseTensorReduceHelper(sp, axes, keep_dims); +// sp.Reorder(reduction->reorder_dims); +// for (const auto& g : sp.group(reduction->group_by_dims)) { // ... // } -// // Set output shape to reduction.reduced_shape. -ReduceDetails SparseTensorReduceHelper(const SparseTensor &sp, +// // Set output shape to reduction->reduced_shape. +absl::StatusOr SparseTensorReduceHelper(const SparseTensor &sp, absl::Span axes_slice, bool keep_dims) { ReduceDetails reduction; @@ -101,7 +103,11 @@ ReduceDetails SparseTensorReduceHelper(const SparseTensor &sp, out_dim_sizes = sp.PickDims(reduction.group_by_dims); } - reduction.reduced_shape = TensorShape(out_dim_sizes); + absl::Status success = + TensorShape::BuildTensorShape(out_dim_sizes, &reduction.reduced_shape); + if (!success.ok()) { + return success; + } return reduction; } @@ -181,8 +187,10 @@ class SparseReduceOp : public OpKernel { OP_REQUIRES_OK(ctx, SparseTensor::Create( tensor::DeepCopy(*indices_t), tensor::DeepCopy(*values_t), shape, &sp)); - ReduceDetails reduction = SparseTensorReduceHelper( + absl::StatusOr reduction_or = SparseTensorReduceHelper( sp, reduction_axes_t->flat(), keep_dims_); + OP_REQUIRES_OK(ctx, reduction_or.status()); + ReduceDetails reduction = *reduction_or; Tensor *out_values; OP_REQUIRES_OK( @@ -287,8 +295,10 @@ class SparseReduceSparseOp : public OpKernel { OP_REQUIRES_OK(ctx, SparseTensor::Create(tensor::DeepCopy(*indices_t), tensor::DeepCopy(*values_t), shape, &sp)); - ReduceDetails reduction = SparseTensorReduceHelper( + absl::StatusOr reduction_or = SparseTensorReduceHelper( sp, reduction_axes_t->flat(), keep_dims_); + OP_REQUIRES_OK(ctx, reduction_or.status()); + ReduceDetails reduction = *reduction_or; sp.Reorder(reduction.reorder_dims); // Count nnzs in the output SparseTensor. From 71c1529afeb44d7a6428a7681b8c0294c67dd67b Mon Sep 17 00:00:00 2001 From: Kuy Mainwaring Date: Mon, 13 May 2024 12:53:59 -0700 Subject: [PATCH 047/478] [XLA:GPU] Clang-tidy fixes for xla/service/gpu/fusions/transpose_test.cc PiperOrigin-RevId: 633303182 --- third_party/xla/xla/service/gpu/fusions/transpose_test.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/third_party/xla/xla/service/gpu/fusions/transpose_test.cc b/third_party/xla/xla/service/gpu/fusions/transpose_test.cc index d7363bbd39f382..dbafd613d15b2b 100644 --- a/third_party/xla/xla/service/gpu/fusions/transpose_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/transpose_test.cc @@ -34,8 +34,6 @@ namespace xla { namespace gpu { namespace { -using ::testing::HasSubstr; - class TransposeTest : public HloTestBase { protected: stream_executor::DeviceDescription device_info_ = From c945821b669c772f7ad2858eb11b27757685c535 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 13 May 2024 13:03:00 -0700 Subject: [PATCH 048/478] Integrate LLVM at llvm/llvm-project@1066eb554770 Updates LLVM usage to match [1066eb554770](https://github.com/llvm/llvm-project/commit/1066eb554770) PiperOrigin-RevId: 633306010 --- third_party/llvm/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index cd453a40ec3bc0..d795d29b2a9434 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "fc57f88f007497a4ead0ec8607ac66e1847b02d6" - LLVM_SHA256 = "0b66773795454d466ef4dcfae7cf38c8200ac4ee431e069ddf68313b3486b004" + LLVM_COMMIT = "1066eb55477044a3a92f3a40471375194dfcdbc8" + LLVM_SHA256 = "dc0fc82c184cb9ccd094f5ff821914d610d79529e82f0cbcb91ddedebff29a0a" tf_http_archive( name = name, From b0fd358c314a7b7a97546ceecd59041fbf1ee3b0 Mon Sep 17 00:00:00 2001 From: Kuy Mainwaring Date: Mon, 13 May 2024 13:12:44 -0700 Subject: [PATCH 049/478] [XLA:GPU] Clang-tidy fixes for xla/service/gpu/fusions/tiling_util.h PiperOrigin-RevId: 633308984 --- third_party/xla/xla/service/gpu/fusions/tiling_util.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/xla/xla/service/gpu/fusions/tiling_util.cc b/third_party/xla/xla/service/gpu/fusions/tiling_util.cc index 24456209e521fb..83f9e5fcfb1da7 100644 --- a/third_party/xla/xla/service/gpu/fusions/tiling_util.cc +++ b/third_party/xla/xla/service/gpu/fusions/tiling_util.cc @@ -201,7 +201,7 @@ absl::StatusOr EmitThreadIdInfo(llvm::IRBuilder<>* builder, absl::StatusOr EmitTilingKernel( llvm::IRBuilder<>* builder, const Tiling& tiling, llvm::Type* index_ty, - const TileGenerator& tile_generator) { + const TileGenerator& tile_element_generator) { absl::Span dims_in_elems = tiling.GetShape(); const auto& block_counts = tiling.GetBlockCounts(); auto constant = [&](uint64_t c) -> llvm::Constant* { @@ -249,7 +249,7 @@ absl::StatusOr EmitTilingKernel( index_ty); }(); - tile_generator(thread_id_info, tile_offset, tile_dimensions); + tile_element_generator(thread_id_info, tile_offset, tile_dimensions); return {{tile_dimensions, tile_offset, thread_id_info}}; } From eac02b8bc7efcdd922d6095393b090f5c9f344d1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 13:15:51 -0700 Subject: [PATCH 050/478] Migrate coord utils to use absl libraries directly + some clang fixes. PiperOrigin-RevId: 633310000 --- .../distributed_runtime/coordination/BUILD | 12 +- .../coordination_service_error_util.h | 4 +- .../coordination_service_rpc_handler.cc | 143 +++++++++--------- .../coordination_service_rpc_handler.h | 7 +- 4 files changed, 86 insertions(+), 80 deletions(-) diff --git a/third_party/xla/xla/tsl/distributed_runtime/coordination/BUILD b/third_party/xla/xla/tsl/distributed_runtime/coordination/BUILD index d7d5e7570970e5..33cf2f09025912 100644 --- a/third_party/xla/xla/tsl/distributed_runtime/coordination/BUILD +++ b/third_party/xla/xla/tsl/distributed_runtime/coordination/BUILD @@ -14,9 +14,9 @@ cc_library( name = "coordination_service_error_util", hdrs = ["coordination_service_error_util.h"], deps = [ + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", - "@local_tsl//tsl/platform:errors", - "@local_tsl//tsl/platform:status", + "@com_google_absl//absl/strings:cord", "@local_tsl//tsl/protobuf:coordination_service_proto_cc", ], ) @@ -195,12 +195,14 @@ cc_library( ":coordination_service_agent", ":coordination_service_error_util", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", "@local_tsl//tsl/platform:casts", - "@local_tsl//tsl/platform:errors", - "@local_tsl//tsl/platform:mutex", + "@local_tsl//tsl/platform:protobuf", "@local_tsl//tsl/platform:status", - "@local_tsl//tsl/platform:statusor", "@local_tsl//tsl/platform:thread_annotations", "@local_tsl//tsl/protobuf:coordination_service_proto_cc", ], diff --git a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_error_util.h b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_error_util.h index a777d121c4ce93..4555a4e90e3a97 100644 --- a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_error_util.h +++ b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_error_util.h @@ -15,9 +15,9 @@ limitations under the License. #ifndef XLA_TSL_DISTRIBUTED_RUNTIME_COORDINATION_COORDINATION_SERVICE_ERROR_UTIL_H_ #define XLA_TSL_DISTRIBUTED_RUNTIME_COORDINATION_COORDINATION_SERVICE_ERROR_UTIL_H_ +#include "absl/status/status.h" +#include "absl/strings/cord.h" #include "absl/strings/string_view.h" -#include "tsl/platform/errors.h" -#include "tsl/platform/status.h" #include "tsl/protobuf/coordination_service.pb.h" namespace tsl { diff --git a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_rpc_handler.cc b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_rpc_handler.cc index ea15408886197c..920d7840486a9e 100644 --- a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_rpc_handler.cc +++ b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_rpc_handler.cc @@ -15,20 +15,23 @@ limitations under the License. #include "xla/tsl/distributed_runtime/coordination/coordination_service_rpc_handler.h" +#include #include #include #include #include #include "absl/algorithm/container.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/synchronization/mutex.h" #include "absl/time/time.h" #include "xla/tsl/distributed_runtime/coordination/coordination_service.h" #include "xla/tsl/distributed_runtime/coordination/coordination_service_agent.h" #include "xla/tsl/distributed_runtime/coordination/coordination_service_error_util.h" -#include "tsl/platform/casts.h" -#include "tsl/platform/errors.h" -#include "tsl/platform/mutex.h" -#include "tsl/platform/statusor.h" +#include "tsl/platform/protobuf.h" +#include "tsl/platform/status.h" #include "tsl/protobuf/coordination_service.pb.h" namespace tsl { @@ -40,23 +43,23 @@ using tensorflow::KeyValueEntry; void CoordinationServiceRpcHandler::SetAgentInstance( CoordinationServiceAgent* agent) { - mutex_lock l(mu_); + absl::MutexLock l(&mu_); agent_ = agent; } void CoordinationServiceRpcHandler::SetServiceInstance( CoordinationServiceInterface* service) { - mutex_lock l(mu_); + absl::MutexLock l(&mu_); service_ = service; } void CoordinationServiceRpcHandler::RegisterTaskAsync( - const RegisterTaskRequest* request, RegisterTaskResponse* response, - StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::RegisterTaskRequest* request, + tensorflow::RegisterTaskResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } const CoordinatedTask& task = request->source_task(); @@ -67,12 +70,12 @@ void CoordinationServiceRpcHandler::RegisterTaskAsync( } void CoordinationServiceRpcHandler::HeartbeatAsync( - const HeartbeatRequest* request, HeartbeatResponse* response, - StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::HeartbeatRequest* request, + tensorflow::HeartbeatResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } const CoordinatedTask& task = request->source_task(); @@ -88,12 +91,12 @@ void CoordinationServiceRpcHandler::HeartbeatAsync( } void CoordinationServiceRpcHandler::WaitForAllTasksAsync( - const WaitForAllTasksRequest* request, WaitForAllTasksResponse* response, - StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::WaitForAllTasksRequest* request, + tensorflow::WaitForAllTasksResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } service_->WaitForAllTasks( @@ -107,12 +110,12 @@ void CoordinationServiceRpcHandler::WaitForAllTasksAsync( } void CoordinationServiceRpcHandler::ShutdownTaskAsync( - const ShutdownTaskRequest* request, ShutdownTaskResponse* response, - StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::ShutdownTaskRequest* request, + tensorflow::ShutdownTaskResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } service_->ShutdownTaskAsync(request->source_task(), @@ -120,30 +123,30 @@ void CoordinationServiceRpcHandler::ShutdownTaskAsync( } void CoordinationServiceRpcHandler::ResetTaskAsync( - const ResetTaskRequest* request, ResetTaskResponse* response, - StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::ResetTaskRequest* request, + tensorflow::ResetTaskResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } done(service_->ResetTask(request->source_task())); } void CoordinationServiceRpcHandler::ReportErrorToTaskAsync( - const ReportErrorToTaskRequest* request, - ReportErrorToTaskResponse* response, StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::ReportErrorToTaskRequest* request, + tensorflow::ReportErrorToTaskResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (agent_ == nullptr) { - done(MakeCoordinationError(errors::Internal( + done(MakeCoordinationError(absl::InternalError( "CoordinationServiceAgent is uninitialized or has already shutdown."))); return; } const CoordinationServiceError& error_payload = request->error_payload(); absl::Status error( static_cast(request->error_code()), - strings::StrCat( + absl::StrCat( "Error reported from /job:", error_payload.source_task().job_name(), "/task:", error_payload.source_task().task_id(), ": ", request->error_message())); @@ -153,12 +156,12 @@ void CoordinationServiceRpcHandler::ReportErrorToTaskAsync( } void CoordinationServiceRpcHandler::ReportErrorToServiceAsync( - const ReportErrorToServiceRequest* request, - ReportErrorToServiceResponse* response, StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::ReportErrorToServiceRequest* request, + tensorflow::ReportErrorToServiceResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } done(service_->ReportTaskError( @@ -171,40 +174,40 @@ void CoordinationServiceRpcHandler::ReportErrorToServiceAsync( } void CoordinationServiceRpcHandler::GetTaskStateAsync( - const GetTaskStateRequest* request, GetTaskStateResponse* response, - StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::GetTaskStateRequest* request, + tensorflow::GetTaskStateResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } auto result = service_->GetTaskState( {request->source_task().begin(), request->source_task().end()}); - absl::c_move(result, - RepeatedFieldBackInserter(response->mutable_task_state())); + absl::c_move(result, tsl::protobuf::RepeatedFieldBackInserter( + response->mutable_task_state())); done(absl::OkStatus()); } void CoordinationServiceRpcHandler::InsertKeyValueAsync( - const InsertKeyValueRequest* request, InsertKeyValueResponse* response, - StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::InsertKeyValueRequest* request, + tensorflow::InsertKeyValueResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } done(service_->InsertKeyValue(request->kv().key(), request->kv().value())); } void CoordinationServiceRpcHandler::GetKeyValueAsync( - const GetKeyValueRequest* request, GetKeyValueResponse* response, - StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::GetKeyValueRequest* request, + tensorflow::GetKeyValueResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } response->mutable_kv()->set_key(request->key()); @@ -219,12 +222,12 @@ void CoordinationServiceRpcHandler::GetKeyValueAsync( } void CoordinationServiceRpcHandler::TryGetKeyValueAsync( - const TryGetKeyValueRequest* request, TryGetKeyValueResponse* response, - StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::TryGetKeyValueRequest* request, + tensorflow::TryGetKeyValueResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } auto result = service_->TryGetKeyValue(request->key()); @@ -238,12 +241,12 @@ void CoordinationServiceRpcHandler::TryGetKeyValueAsync( } void CoordinationServiceRpcHandler::GetKeyValueDirAsync( - const GetKeyValueDirRequest* request, GetKeyValueDirResponse* response, - StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::GetKeyValueDirRequest* request, + tensorflow::GetKeyValueDirResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } std::vector results = @@ -254,24 +257,24 @@ void CoordinationServiceRpcHandler::GetKeyValueDirAsync( } void CoordinationServiceRpcHandler::DeleteKeyValueAsync( - const DeleteKeyValueRequest* request, DeleteKeyValueResponse* response, - StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::DeleteKeyValueRequest* request, + tensorflow::DeleteKeyValueResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } done(service_->DeleteKeyValue(request->key())); } -void CoordinationServiceRpcHandler::BarrierAsync(const BarrierRequest* request, - BarrierResponse* response, - StatusCallback done) { - tf_shared_lock l(mu_); +void CoordinationServiceRpcHandler::BarrierAsync( + const tensorflow::BarrierRequest* request, + tensorflow::BarrierResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } std::vector tasks = {request->tasks().begin(), @@ -284,12 +287,12 @@ void CoordinationServiceRpcHandler::BarrierAsync(const BarrierRequest* request, } void CoordinationServiceRpcHandler::CancelBarrierAsync( - const CancelBarrierRequest* request, CancelBarrierResponse* response, - StatusCallback done) { - tf_shared_lock l(mu_); + const tensorflow::CancelBarrierRequest* request, + tensorflow::CancelBarrierResponse* response, StatusCallback done) { + absl::ReaderMutexLock l(&mu_); if (service_ == nullptr) { done(MakeCoordinationError( - errors::Internal("Coordination service is not enabled."))); + absl::InternalError("Coordination service is not enabled."))); return; } done(service_->CancelBarrier(request->barrier_id(), request->source_task())); diff --git a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_rpc_handler.h b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_rpc_handler.h index 2895467f6e89d6..51d4f9f6901dc6 100644 --- a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_rpc_handler.h +++ b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_rpc_handler.h @@ -16,9 +16,10 @@ limitations under the License. #ifndef XLA_TSL_DISTRIBUTED_RUNTIME_COORDINATION_COORDINATION_SERVICE_RPC_HANDLER_H_ #define XLA_TSL_DISTRIBUTED_RUNTIME_COORDINATION_COORDINATION_SERVICE_RPC_HANDLER_H_ +#include "absl/status/status.h" +#include "absl/synchronization/mutex.h" #include "xla/tsl/distributed_runtime/coordination/coordination_service.h" #include "xla/tsl/distributed_runtime/coordination/coordination_service_agent.h" -#include "tsl/platform/mutex.h" #include "tsl/platform/status.h" #include "tsl/platform/thread_annotations.h" #include "tsl/protobuf/coordination_service.pb.h" @@ -26,7 +27,7 @@ limitations under the License. namespace tsl { class CoordinationServiceRpcHandler { public: - explicit CoordinationServiceRpcHandler() {} + explicit CoordinationServiceRpcHandler() = default; void SetAgentInstance(CoordinationServiceAgent* agent); @@ -92,7 +93,7 @@ class CoordinationServiceRpcHandler { StatusCallback done); private: - mutex mu_; + absl::Mutex mu_; CoordinationServiceAgent* agent_ TF_GUARDED_BY(mu_) = nullptr; CoordinationServiceInterface* service_ TF_GUARDED_BY(mu_) = nullptr; }; From 362bb373366c2a377e719cedf9867845aee807f5 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Mon, 13 May 2024 13:17:38 -0700 Subject: [PATCH 051/478] PR #12049: Detect when we are running on an NVIDIA simulator and check that we pull the device description from gpu_specs. Imported from GitHub PR https://github.com/openxla/xla/pull/12049 This helps when testing the compiler on new hardware platforms. The simulator may have nonsensical values for a few fields in the device description (such as device_memory_size == -1), so we want to use a hard-coded device description that contains the correct values. Copybara import of the project: -- 1a488e45b3a7d0f7729fde9c28915c40188d17b3 by Dimitris Vardoulakis : On Nvidia simulation, fail if target config hasn't been supplied using --xla_gpu_target_config_filename. Merging this change closes #12049 PiperOrigin-RevId: 633310642 --- third_party/xla/xla/service/gpu/gpu_compiler.cc | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index 297eb7b98fdb71..35f6bf612f7ee6 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -1533,7 +1533,20 @@ absl::Status GpuCompiler::OptimizeHloPostLayoutAssignment( return Compiler::TargetConfig{gpu_target_config_proto}; } if (executor) { - return Compiler::TargetConfig{executor}; + Compiler::TargetConfig target_config = Compiler::TargetConfig{executor}; + int64_t device_memory_size = + target_config.device_description.device_memory_size(); + // Checking for device_memory_size == -1 is how we detect that we are + // running on Nvidia's software simulator. When running on simulation, + // the config from StreamExecutor is inaccurate, so we must load the + // hard-coded config from a file. + if (device_memory_size == -1) { + return absl::FailedPreconditionError( + "When running on an NVIDIA simulation device, you must use " + "--xla_gpu_target_config_filename to pass in target information. " + "The target config from StreamExecutor is inaccurate."); + } + return target_config; } return absl::InternalError( "Either GPU has to be attached, or --xla_gpu_target_config_filename " From 30784c22ee80465fb7eff793c79d81ce800bc2b3 Mon Sep 17 00:00:00 2001 From: Kuy Mainwaring Date: Mon, 13 May 2024 13:18:29 -0700 Subject: [PATCH 052/478] [XLA:GPU] Clang-tidy fixes for xla/service/gpu/fusions/mlir/lower_tensors.cc PiperOrigin-RevId: 633310993 --- third_party/xla/xla/service/gpu/fusions/mlir/lower_tensors.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/third_party/xla/xla/service/gpu/fusions/mlir/lower_tensors.cc b/third_party/xla/xla/service/gpu/fusions/mlir/lower_tensors.cc index 5677b45e5342bf..43bc759b067221 100644 --- a/third_party/xla/xla/service/gpu/fusions/mlir/lower_tensors.cc +++ b/third_party/xla/xla/service/gpu/fusions/mlir/lower_tensors.cc @@ -78,7 +78,6 @@ using mlir::TypedValue; using mlir::TypeRange; using mlir::Value; using mlir::ValueRange; -using mlir::arith::AtomicRMWKind; namespace arith = ::mlir::arith; namespace scf = ::mlir::scf; From f9b3c69fb1dc02ec06962edf1ce660fe6efbb6e7 Mon Sep 17 00:00:00 2001 From: Kuy Mainwaring Date: Mon, 13 May 2024 13:18:39 -0700 Subject: [PATCH 053/478] [XLA:GPU] Clang-tidy fixes for xla/service/gpu/fusions/mlir/convert_xla_gpu_pure_call_ops.cc PiperOrigin-RevId: 633311058 --- .../service/gpu/fusions/mlir/convert_xla_gpu_pure_call_ops.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/xla/xla/service/gpu/fusions/mlir/convert_xla_gpu_pure_call_ops.cc b/third_party/xla/xla/service/gpu/fusions/mlir/convert_xla_gpu_pure_call_ops.cc index 181005130b6a56..7f06615b91a9d2 100644 --- a/third_party/xla/xla/service/gpu/fusions/mlir/convert_xla_gpu_pure_call_ops.cc +++ b/third_party/xla/xla/service/gpu/fusions/mlir/convert_xla_gpu_pure_call_ops.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include +#include #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project #include "mlir/IR/PatternMatch.h" // from @llvm-project From 329241d3241de9291d07e574668080f4ff61d890 Mon Sep 17 00:00:00 2001 From: Kuy Mainwaring Date: Mon, 13 May 2024 13:19:45 -0700 Subject: [PATCH 054/478] [XLA:GPU] Clang-tidy fixes for xla/service/gpu/fusions/triton.cc PiperOrigin-RevId: 633311473 --- third_party/xla/xla/service/gpu/fusions/triton.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/xla/xla/service/gpu/fusions/triton.cc b/third_party/xla/xla/service/gpu/fusions/triton.cc index ebbaccdb0bd742..30a98194e04200 100644 --- a/third_party/xla/xla/service/gpu/fusions/triton.cc +++ b/third_party/xla/xla/service/gpu/fusions/triton.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include #include +#include #include "absl/log/check.h" #include "absl/log/log.h" From a8fe248d93b5608bc1ed1a8fda14c60b131b7371 Mon Sep 17 00:00:00 2001 From: Kyle Lucke Date: Mon, 13 May 2024 13:29:47 -0700 Subject: [PATCH 055/478] Eliminate CommandBuffer::Create method in favor of the CreateCommandBuffer on StreamExecutorInterface. This is a step of reducing circular dependencies between command_buffer.h and stream_executor_interface.h. PiperOrigin-RevId: 633314618 --- .../gpu/runtime/command_buffer_cmd_test.cc | 9 +++-- .../gpu/runtime/command_buffer_thunk.cc | 4 +- .../xla/xla/stream_executor/command_buffer.cc | 7 +--- .../xla/xla/stream_executor/command_buffer.h | 8 ---- .../gpu/gpu_command_buffer_test.cc | 38 +++++++++---------- 5 files changed, 29 insertions(+), 37 deletions(-) diff --git a/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd_test.cc b/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd_test.cc index 7e1fa0d0cfd0c3..f96f4fa5d9e246 100644 --- a/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd_test.cc +++ b/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd_test.cc @@ -214,7 +214,8 @@ TEST(CommandBufferCmdTest, MemcpyCmd) { CommandBufferCmd::RecordParams record_params = {state}; - auto command_buffer = se::CommandBuffer::Create(executor).value(); + auto command_buffer = + executor->CreateCommandBuffer(se::CommandBuffer::Mode::kPrimary).value(); TF_ASSERT_OK(commands.Record(params, record_params, command_buffer.get())); // Execute command buffer and verify that it copied the memory. @@ -282,7 +283,8 @@ TEST(CommandBufferCmdTest, BarrierCmd) { CommandBufferCmd::RecordParams record_params = {state}; - auto command_buffer = se::CommandBuffer::Create(executor).value(); + auto command_buffer = + executor->CreateCommandBuffer(se::CommandBuffer::Mode::kPrimary).value(); TF_ASSERT_OK(commands.Record(params, record_params, command_buffer.get())); // Execute command buffer and verify that it copied the memory. @@ -359,7 +361,8 @@ TEST(CommandBufferCmdTest, LaunchCmd) { CommandBufferCmd::RecordParams record_params = {state}; - auto command_buffer = se::CommandBuffer::Create(executor).value(); + auto command_buffer = + executor->CreateCommandBuffer(se::CommandBuffer::Mode::kPrimary).value(); TF_ASSERT_OK(commands.Record(params, record_params, command_buffer.get())); // Execute command buffer and verify that it copied the memory. diff --git a/third_party/xla/xla/service/gpu/runtime/command_buffer_thunk.cc b/third_party/xla/xla/service/gpu/runtime/command_buffer_thunk.cc index 4e74fdcd821c8b..8236ad2a65cb08 100644 --- a/third_party/xla/xla/service/gpu/runtime/command_buffer_thunk.cc +++ b/third_party/xla/xla/service/gpu/runtime/command_buffer_thunk.cc @@ -274,7 +274,9 @@ CommandBufferThunk::GetOrCreateCommandBuffer(se::StreamExecutor* executor) { } // Create a new empty command buffer. - TF_ASSIGN_OR_RETURN(auto command_buffer, se::CommandBuffer::Create(executor)); + TF_ASSIGN_OR_RETURN( + auto command_buffer, + executor->CreateCommandBuffer(se::CommandBuffer::Mode::kPrimary)); auto emplaced = state_->command_buffers.emplace( executor, std::make_shared(std::move(command_buffer))); diff --git a/third_party/xla/xla/stream_executor/command_buffer.cc b/third_party/xla/xla/stream_executor/command_buffer.cc index 1c5be3d8e0d80a..cd28a135bf519c 100644 --- a/third_party/xla/xla/stream_executor/command_buffer.cc +++ b/third_party/xla/xla/stream_executor/command_buffer.cc @@ -29,11 +29,6 @@ limitations under the License. namespace stream_executor { -absl::StatusOr> CommandBuffer::Create( - StreamExecutorInterface* executor, Mode mode) { - return executor->CreateCommandBuffer(mode); -} - absl::StatusOr> CommandBuffer::Trace( StreamExecutorInterface* executor, absl::AnyInvocable function, Mode mode) { @@ -50,7 +45,7 @@ absl::StatusOr> CommandBuffer::Trace( // Prepare an empty command buffer instance. TF_ASSIGN_OR_RETURN(std::unique_ptr command_buffer, - CommandBuffer::Create(executor, mode)); + executor->CreateCommandBuffer(mode)); // Trace and finalize the command buffer. TF_RETURN_IF_ERROR( diff --git a/third_party/xla/xla/stream_executor/command_buffer.h b/third_party/xla/xla/stream_executor/command_buffer.h index 1237493a2fb55e..a91963d46a0880 100644 --- a/third_party/xla/xla/stream_executor/command_buffer.h +++ b/third_party/xla/xla/stream_executor/command_buffer.h @@ -165,14 +165,6 @@ class CommandBuffer { // Command buffer constructors //===--------------------------------------------------------------------===// - // TODO(b/323534971): Command buffer constructors should be moved to - // StreamExecutor or a dedicated CommandBufferFactory accessible via - // StreamExecutor. - - // Creates a new empty command buffer on the given executor. - static absl::StatusOr> Create( - StreamExecutorInterface* executor, Mode mode = Mode::kPrimary); - // Creates a new command buffer on the given executor by tracing `function` // invocation. All StreamExecutor operations on a Stream argument will be // recorded into the command buffer. Returned command buffer is finalized, and diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc index 4440393cc3bb8d..bd50ac29429b0e 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc +++ b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc @@ -125,7 +125,7 @@ TEST(GpuCommandBufferTest, LaunchSingleKernel) { TF_ASSERT_OK(stream->MemZero(&c, byte_length)); // Create a command buffer with a single kernel launch. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(cmd_buffer->Launch(add, ThreadDim(), BlockDim(4), a, b, c)); TF_ASSERT_OK(cmd_buffer->Finalize()); @@ -239,8 +239,8 @@ TEST(GpuCommandBufferTest, LaunchNestedCommandBuffer) { TF_ASSERT_OK(stream->MemZero(&c, byte_length)); // Create a command buffer with a single kernel launch. - auto primary_cmd = CommandBuffer::Create(executor).value(); - auto nested_cmd = CommandBuffer::Create(executor, nested).value(); + auto primary_cmd = executor->CreateCommandBuffer(primary).value(); + auto nested_cmd = executor->CreateCommandBuffer(nested).value(); TF_ASSERT_OK(nested_cmd->Launch(add, ThreadDim(), BlockDim(4), a, b, c)); TF_ASSERT_OK(primary_cmd->AddNestedCommandBuffer(*nested_cmd)); TF_ASSERT_OK(primary_cmd->Finalize()); @@ -260,7 +260,7 @@ TEST(GpuCommandBufferTest, LaunchNestedCommandBuffer) { // Update command buffer to write into `d` buffer by creating a new nested // command buffer. - nested_cmd = CommandBuffer::Create(executor, nested).value(); + nested_cmd = executor->CreateCommandBuffer(nested).value(); TF_ASSERT_OK(nested_cmd->Launch(add, ThreadDim(), BlockDim(4), a, b, d)); TF_ASSERT_OK(primary_cmd->Update()); TF_ASSERT_OK(primary_cmd->AddNestedCommandBuffer(*nested_cmd)); @@ -290,7 +290,7 @@ TEST(GpuCommandBufferTest, MemcpyDeviceToDevice) { TF_ASSERT_OK(stream->Memset32(&a, 42, byte_length)); // Create a command buffer with a single a to b memcpy command. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(cmd_buffer->MemcpyDeviceToDevice(&b, a, byte_length)); TF_ASSERT_OK(cmd_buffer->Finalize()); @@ -331,7 +331,7 @@ TEST(GpuCommandBufferTest, Memset) { DeviceMemory a = executor->AllocateArray(length, 0); // Create a command buffer with a single memset command. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(cmd_buffer->Memset(&a, uint32_t{42}, length)); TF_ASSERT_OK(cmd_buffer->Finalize()); @@ -402,7 +402,7 @@ TEST(GpuCommandBufferTest, Barriers) { }; // Create a command buffer with a DAG of memset commands. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(record(cmd_buffer.get(), 42)); TF_ASSERT_OK(executor->Submit(stream.get(), *cmd_buffer)); @@ -482,7 +482,7 @@ TEST(GpuCommandBufferTest, IndependentExecutionScopes) { }; // Create a command buffer with a DAG of memset commands. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(record(cmd_buffer.get(), 42)); TF_ASSERT_OK(executor->Submit(stream.get(), *cmd_buffer)); @@ -556,7 +556,7 @@ TEST(GpuCommandBufferTest, ExecutionScopeBarriers) { }; // Create a command buffer with a DAG of memset commands. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(record(cmd_buffer.get(), 42)); TF_ASSERT_OK(executor->Submit(stream.get(), *cmd_buffer)); @@ -646,7 +646,7 @@ TEST(GpuCommandBufferTest, ExecutionScopeOneDirectionalBarriers) { }; // Create a command buffer with a DAG of memset commands. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(record(cmd_buffer.get(), 42)); TF_ASSERT_OK(executor->Submit(stream.get(), *cmd_buffer)); @@ -720,7 +720,7 @@ TEST(GpuCommandBufferTest, ConditionalIf) { }; // Create a command buffer with a single conditional operation. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(cmd_buffer->If(pred, then_builder)); TF_ASSERT_OK(cmd_buffer->Finalize()); @@ -817,7 +817,7 @@ TEST(GpuCommandBufferTest, ConditionalIfElse) { }; // Create a command buffer with a single conditional operation. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(cmd_buffer->IfElse(pred, then_builder, else_builder)); TF_ASSERT_OK(cmd_buffer->Finalize()); @@ -912,7 +912,7 @@ TEST(GpuCommandBufferTest, ConditionalCase) { }; // Create a command buffer with a single conditional operation. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(cmd_buffer->Case(index, {branch0, branch1})); TF_ASSERT_OK(cmd_buffer->Finalize()); @@ -991,7 +991,7 @@ TEST(GpuCommandBufferTest, ConditionalFor) { int32_t num_iters = 10; // Create a command buffer with a single conditional operation. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(cmd_buffer->For(num_iters, loop_counter, body_builder)); TF_ASSERT_OK(cmd_buffer->Finalize()); @@ -1058,7 +1058,7 @@ TEST(GpuCommandBufferTest, ConditionalWhile) { }; // Create a command buffer with a single conditional operation. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(cmd_buffer->While(pred, cond_builder, body_builder)); TF_ASSERT_OK(cmd_buffer->Finalize()); @@ -1125,7 +1125,7 @@ TEST(GpuCommandBufferTest, ConditionalIfInExecutionScope) { }; // Create a command buffer with a DAG of memset commands. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(record(cmd_buffer.get(), 42)); TF_ASSERT_OK(executor->Submit(stream.get(), *cmd_buffer)); @@ -1226,7 +1226,7 @@ TEST(GpuCommandBufferTest, ConditionalWhileInExecutionScope) { }; // Create a command buffer with a single conditional operation. - auto cmd_buffer = CommandBuffer::Create(executor).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); TF_ASSERT_OK(record(cmd_buffer.get(), 42, 10)); TF_ASSERT_OK(executor->Submit(stream.get(), *cmd_buffer)); @@ -1290,7 +1290,7 @@ static void BM_CreateCommandBuffer(benchmark::State& state) { DeviceMemory b = executor->AllocateArray(1, 0); for (auto s : state) { - auto cmd_buffer = CommandBuffer::Create(executor, nested).value(); + auto cmd_buffer = executor->CreateCommandBuffer(nested).value(); for (int i = 1; i < state.range(0); ++i) { CHECK_OK(cmd_buffer->Launch(add, ThreadDim(), BlockDim(4), b, b, b)); } @@ -1336,7 +1336,7 @@ static void BM_UpdateCommandBuffer(benchmark::State& state) { DeviceMemory b = executor->AllocateArray(1, 0); - auto cmd_buffer = CommandBuffer::Create(executor, primary).value(); + auto cmd_buffer = executor->CreateCommandBuffer(primary).value(); for (int i = 1; i < state.range(0); ++i) { CHECK_OK(cmd_buffer->Launch(add, ThreadDim(), BlockDim(4), b, b, b)); } From 3cf76f38d220c81a03b71bf464232e908093004f Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 13 May 2024 13:39:34 -0700 Subject: [PATCH 056/478] [xla:cpu] Fix msan warnings from arguments coming from jit compiled function PiperOrigin-RevId: 633317729 --- third_party/xla/xla/service/cpu/BUILD | 1 + .../xla/xla/service/cpu/runtime_handle_ffi_call.cc | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/third_party/xla/xla/service/cpu/BUILD b/third_party/xla/xla/service/cpu/BUILD index 9d115f91eb2092..125af6dbce9170 100644 --- a/third_party/xla/xla/service/cpu/BUILD +++ b/third_party/xla/xla/service/cpu/BUILD @@ -1189,6 +1189,7 @@ cc_library( "//xla/service:custom_call_status_public_headers", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:dynamic_annotations", "@com_google_absl//absl/log", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", diff --git a/third_party/xla/xla/service/cpu/runtime_handle_ffi_call.cc b/third_party/xla/xla/service/cpu/runtime_handle_ffi_call.cc index d0e26cc5cfc232..7a7963593f592a 100644 --- a/third_party/xla/xla/service/cpu/runtime_handle_ffi_call.cc +++ b/third_party/xla/xla/service/cpu/runtime_handle_ffi_call.cc @@ -22,8 +22,10 @@ limitations under the License. #include "absl/algorithm/container.h" #include "absl/base/attributes.h" +#include "absl/base/dynamic_annotations.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "absl/types/span.h" #include "llvm/ADT/TypeSwitch.h" #include "mlir/AsmParser/AsmParser.h" // from @llvm-project @@ -134,7 +136,12 @@ absl::StatusOr BuildAttributesMap(mlir::DictionaryAttr dict) { } absl::Span DecodeDims(int64_t* encoded_dims_data) { + // Annotate memory coming from jit compiled function as initialized to + // suppress false positives from msan sanitizer. + ABSL_ANNOTATE_MEMORY_IS_INITIALIZED(encoded_dims_data, sizeof(int64_t)); auto dims_count = encoded_dims_data[0]; + ABSL_ANNOTATE_MEMORY_IS_INITIALIZED(encoded_dims_data, + dims_count * sizeof(int64_t)); auto dims_begin = encoded_dims_data + 1; return absl::MakeSpan(dims_begin, dims_begin + dims_count); } @@ -263,6 +270,13 @@ ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_HandleFfiCall( outputs = reinterpret_cast(output); } + // Annotate memory coming from jit compiled function as initialized to + // suppress false positives from msan sanitizer. + ABSL_ANNOTATE_MEMORY_IS_INITIALIZED(result_types, + result_count * sizeof(int32_t)); + ABSL_ANNOTATE_MEMORY_IS_INITIALIZED(operand_types, + operand_count * sizeof(int32_t)); + absl::Status status = BuildAndCallFfi( target_name, backend_config, absl::MakeSpan(outputs, result_count), absl::MakeSpan(inputs, operand_count), From a24156129530420b7e97d873f8d80be6158ab6d8 Mon Sep 17 00:00:00 2001 From: Kuy Mainwaring Date: Mon, 13 May 2024 13:42:45 -0700 Subject: [PATCH 057/478] [XLA:GPU] Clang-tidy fixes for xla/service/gpu/runtime/address_computation_thunk.h PiperOrigin-RevId: 633318572 --- .../xla/xla/service/gpu/runtime/address_computation_thunk.h | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/xla/xla/service/gpu/runtime/address_computation_thunk.h b/third_party/xla/xla/service/gpu/runtime/address_computation_thunk.h index 8d36751b9d830d..dffe9c5da1d8f6 100644 --- a/third_party/xla/xla/service/gpu/runtime/address_computation_thunk.h +++ b/third_party/xla/xla/service/gpu/runtime/address_computation_thunk.h @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include #include #include "absl/base/thread_annotations.h" From dbcbb3eef6278cee0e614926f64fbb795308198f Mon Sep 17 00:00:00 2001 From: Ionel Gog Date: Mon, 13 May 2024 13:45:54 -0700 Subject: [PATCH 058/478] Allow ifrt.Call of functions that consists only of ifrt.Reshard ops. PiperOrigin-RevId: 633319317 --- third_party/xla/xla/python/ifrt/ir/constants.h | 6 ++++++ third_party/xla/xla/python/ifrt/ir/ifrt_interfaces.cc | 9 ++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/third_party/xla/xla/python/ifrt/ir/constants.h b/third_party/xla/xla/python/ifrt/ir/constants.h index cd1bc06bf85ee0..27e9d11fb6a1cf 100644 --- a/third_party/xla/xla/python/ifrt/ir/constants.h +++ b/third_party/xla/xla/python/ifrt/ir/constants.h @@ -25,6 +25,12 @@ namespace ifrt { // apart from atom program FuncOps (callee of `ifrt.Call`). inline constexpr llvm::StringLiteral kIfrtFunctionAttrName = "ifrt.function"; +// Name of UnitAttr on FuncOp to indicate it's an IFRT IR function that +// only reshards arrays. While functions with kIfrtFunctionAttrName attribute +// cannot be `ifrt.Call`ed, kIfrtReshardFunctionAttrName can be called. +inline constexpr llvm::StringLiteral kIfrtReshardFunctionAttrName = + "ifrt.reshard_function"; + // Name of UnitAttr on arguments of FuncOp to indicate a donated input. // Must be used in a FuncOp with `ifrt.function` attr. inline constexpr llvm::StringLiteral kIfrtDonatedArgAttrName = "ifrt.donated"; diff --git a/third_party/xla/xla/python/ifrt/ir/ifrt_interfaces.cc b/third_party/xla/xla/python/ifrt/ir/ifrt_interfaces.cc index a079f75fbdd597..db8844da11e36b 100644 --- a/third_party/xla/xla/python/ifrt/ir/ifrt_interfaces.cc +++ b/third_party/xla/xla/python/ifrt/ir/ifrt_interfaces.cc @@ -37,9 +37,12 @@ namespace impl { LogicalResult verifyNestedInIfrtFunc(Operation* op) { auto func_op = op->getParentOfType(); if (func_op != nullptr && - !func_op->hasAttr(::xla::ifrt::kIfrtFunctionAttrName)) { - return op->emitOpError() << "must be in a FuncOp with attr `" - << ::xla::ifrt::kIfrtFunctionAttrName << "`"; + !func_op->hasAttr(::xla::ifrt::kIfrtFunctionAttrName) && + !func_op->hasAttr(::xla::ifrt::kIfrtReshardFunctionAttrName)) { + return op->emitOpError() + << "must be in a FuncOp with attr `" + << ::xla::ifrt::kIfrtFunctionAttrName << "` or atttr `" + << ::xla::ifrt::kIfrtReshardFunctionAttrName << "`"; } return success(); } From 11beb9d544e7b18449a958e4ba877346d697db21 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 13:49:17 -0700 Subject: [PATCH 059/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633320442 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 53c362ed0ab819..c8c55e08ca0d32 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugstr +go/debugonly op { name: "Abort" attr { From 696e6819516e973ee2e0ab7be439d1a747b55d32 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Mon, 13 May 2024 13:55:14 -0700 Subject: [PATCH 060/478] [XLA:SPACE_TO_BATCH] correctly propagate on dot PiperOrigin-RevId: 633322160 --- .../xla/service/space_to_batch_converter.cc | 101 +++++++++++++++++- .../service/space_to_batch_converter_test.cc | 13 +-- 2 files changed, 104 insertions(+), 10 deletions(-) diff --git a/third_party/xla/xla/service/space_to_batch_converter.cc b/third_party/xla/xla/service/space_to_batch_converter.cc index 88cbb5ecad505e..a6149b197805ec 100644 --- a/third_party/xla/xla/service/space_to_batch_converter.cc +++ b/third_party/xla/xla/service/space_to_batch_converter.cc @@ -16,6 +16,7 @@ limitations under the License. #include #include +#include #include #include #include @@ -112,6 +113,8 @@ class ConvolutionVisitor { // This function checks if the HLO instruction supports propagation. bool SupportedOpForPropagation(HloInstruction* consumer, HloInstruction* producer); + bool SupportedDotForPropagation(HloInstruction* consumer, + HloInstruction* producer); // Method that checks validity of Broadcast propagation. bool IsBroadcastPropagatable(HloInstruction* broadcast, @@ -1561,13 +1564,55 @@ bool ConvolutionVisitor::IsOpcodeNonPropagatable(HloInstruction* consumer) { switch (consumer->opcode()) { case HloOpcode::kCustomCall: return true; - case HloOpcode::kDot: - return !ctrl_.enable_propagations_on_dots; default: return false; } } +bool ConvolutionVisitor::SupportedDotForPropagation(HloInstruction* consumer, + HloInstruction* producer) { + if (consumer->opcode() != HloOpcode::kDot) { + return false; + } + auto operand = consumer->mutable_operand(0); + if (operand != producer || !instr_to_dim_map_.contains(operand)) { + return false; + } + const auto& dnums = consumer->dot_dimension_numbers(); + const auto& contracting_dims = dnums.lhs_contracting_dimensions(); + const auto& batch_dims = dnums.lhs_batch_dimensions(); + auto result = instr_to_dim_map_[operand]; + const int64_t old_batch_dim = result[DimMapper(SpaceToBatchDimMap::kBatch)]; + const int64_t old_space_dim = result[DimMapper(SpaceToBatchDimMap::kSpace0)]; + const int64_t old_feature_dim = + result[DimMapper(SpaceToBatchDimMap::kFeature)]; + // No feature dimension in output + if (consumer->operand(1)->shape().rank() == + batch_dims.size() + contracting_dims.size()) { + return false; + } + // If the convolution space or batch dimension are contracting or batch on + // the dot, do not propagate. + bool found = false; + for (auto dim : batch_dims) { + if (dim == old_batch_dim || dim == old_space_dim) { + return false; + } + if (dim == old_feature_dim) { + found = true; + } + } + if (!found) { + return false; + } + for (auto dim : contracting_dims) { + if (dim == old_batch_dim || dim == old_space_dim) { + return false; + } + } + return true; +} + bool ConvolutionVisitor::SupportedOpForPropagation(HloInstruction* consumer, HloInstruction* producer) { if (IsOpcodeNonPropagatable(consumer)) { @@ -1682,6 +1727,10 @@ bool ConvolutionVisitor::SupportedOpForPropagation(HloInstruction* consumer, return true; } + if (SupportedDotForPropagation(consumer, producer)) { + return true; + } + if (consumer->opcode() == HloOpcode::kReduce) { // Support only the trivial case where both batch and split spatial dim are // being reduced @@ -1964,6 +2013,50 @@ absl::StatusOr ConvolutionVisitor::Propagate(HloInstruction* consumer, return true; } + if (consumer->opcode() == HloOpcode::kDot) { + auto dim_map_val = instr_to_dim_map_[producer]; + const int64_t old_batch_dim = + dim_map_val[DimMapper(SpaceToBatchDimMap::kBatch)]; + const int64_t old_space_dim = + dim_map_val[DimMapper(SpaceToBatchDimMap::kSpace0)]; + int64_t new_batch_dim = -1; + int64_t new_space_dim = -1; + int64_t outer = 0; + for (int64_t i = 0; i < producer->shape().rank(); ++i) { + if (absl::c_linear_search( + consumer->dot_dimension_numbers().lhs_batch_dimensions(), i) || + absl::c_linear_search( + consumer->dot_dimension_numbers().lhs_contracting_dimensions(), + i)) { + continue; + } + if (i == old_batch_dim) { + new_batch_dim = + outer + + consumer->dot_dimension_numbers().lhs_batch_dimensions_size(); + } + if (i == old_space_dim) { + new_batch_dim = + outer + + consumer->dot_dimension_numbers().lhs_batch_dimensions_size(); + } + ++outer; + } + std::vector dim_map(NumMappedDims()); + dim_map[DimMapper(SpaceToBatchDimMap::kBatch)] = new_batch_dim; + dim_map[DimMapper(SpaceToBatchDimMap::kSpace0)] = new_space_dim; + dim_map[DimMapper(SpaceToBatchDimMap::kFeature)] = + consumer->shape().rank() - 1; + instr_to_dim_map_[consumer] = dim_map; + auto new_consumer = computation->AddInstruction(consumer->Clone()); + new_consumer->mutable_shape()->mutable_dimensions()[new_batch_dim] = + producer->shape().dimensions(old_batch_dim); + new_consumer->mutable_shape()->mutable_dimensions()[new_space_dim] = + producer->shape().dimensions(old_space_dim); + old_to_new_instrs_[consumer] = new_consumer; + return true; + } + // TODO(b/189500737) : Consider a common way of propagation for // slice/pad/reduce-window. if (consumer->opcode() == HloOpcode::kPad) { @@ -3619,7 +3712,8 @@ ConvolutionVisitor::DoesConvolutionFeedReduceWindowOrSelectAndScatter( // Stop the search if these ops are encountered. if (user->opcode() == HloOpcode::kConvolution || user->opcode() == HloOpcode::kPad || - user->opcode() == HloOpcode::kTranspose) { + user->opcode() == HloOpcode::kTranspose || + user->opcode() == HloOpcode::kDot) { continue; } auto ret = @@ -3986,7 +4080,6 @@ Status ConvolutionVisitor::PerformSpaceToBatchOnConvolution( } TF_CHECK_OK(PropagateOnUsers(original_conv)); - return OkStatus(); } diff --git a/third_party/xla/xla/service/space_to_batch_converter_test.cc b/third_party/xla/xla/service/space_to_batch_converter_test.cc index cf8dce26211b83..e2ed3314bc4f6f 100644 --- a/third_party/xla/xla/service/space_to_batch_converter_test.cc +++ b/third_party/xla/xla/service/space_to_batch_converter_test.cc @@ -247,7 +247,7 @@ ENTRY computation { EXPECT_GT(previous_reshape->operand(0)->shape().dimensions(batch_dim), 4); } -TEST_F(SpaceToBatchConverterTest, NoPropagateThroughDot) { +TEST_F(SpaceToBatchConverterTest, PropagateThroughDot) { std::string hlo_string = R"( HloModule module @@ -256,9 +256,10 @@ TEST_F(SpaceToBatchConverterTest, NoPropagateThroughDot) { %p1 = bf16[3,3,32,32] parameter(1) %convolution = bf16[1,256,256,32] convolution(%p0, %p1), window={size=3x3}, dim_labels=b01f_01io->b01f - %p2 = bf16[1,256,256,32] parameter(2) - ROOT %dot.5010 = bf16[1,256,32,32] dot(%convolution, %p2), lhs_batch_dims={0,1}, - lhs_contracting_dims={2}, rhs_batch_dims={0,2}, rhs_contracting_dims={1} + %p2 = bf16[32,32] parameter(2) + ROOT %dot.5010 = bf16[1,256,256,32] dot(%convolution, %p2), + lhs_contracting_dims={3}, + rhs_contracting_dims={0} } )"; @@ -267,8 +268,8 @@ TEST_F(SpaceToBatchConverterTest, NoPropagateThroughDot) { SpaceToBatchConverter converter( SpaceToBatchController{true, true, true, true, 8}); - // Test that we do not start space-to-batch on conv->dot chains - ASSERT_FALSE(converter.Run(module.get()).value()); + // Test that we do not start space-to-batch on conv->dot chains. + ASSERT_TRUE(converter.Run(module.get()).value()); } } // namespace From 4beabf4430b0950890f31298012edd8aaaba0510 Mon Sep 17 00:00:00 2001 From: Mason Chang Date: Mon, 13 May 2024 13:55:49 -0700 Subject: [PATCH 061/478] Update comment to reflect placement PiperOrigin-RevId: 633322446 --- tensorflow/core/common_runtime/placer.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/common_runtime/placer.cc b/tensorflow/core/common_runtime/placer.cc index 5908b26a898690..2d03de9f2434bc 100644 --- a/tensorflow/core/common_runtime/placer.cc +++ b/tensorflow/core/common_runtime/placer.cc @@ -291,8 +291,8 @@ Status Placer::Run(const GraphOptimizationPassOptions& options) { // to perform good placement we can add an interface for this. int assigned_device = -1; - // Heuristic B: If the node only operates on metadata, not data, - // then it is desirable to place that metadata node with its + // Heuristic B: If the node only operates on metadata (not data) or is + // an identity node, then it is desirable to place that node with its // input. if (IsMetadata(node) || MatchIdentityOperation(node)) { // Make sure that the input device type is in the list of supported From 3bd8e82f856430f30276020f2342c1e797534db4 Mon Sep 17 00:00:00 2001 From: Sania Nagpal Date: Mon, 13 May 2024 14:13:17 -0700 Subject: [PATCH 062/478] No public description PiperOrigin-RevId: 633328239 --- tensorflow/core/tfrt/runtime/BUILD | 3 + tensorflow/core/tfrt/runtime/stream_test.cc | 87 ++++++++++++++++++--- 2 files changed, 77 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/tfrt/runtime/BUILD b/tensorflow/core/tfrt/runtime/BUILD index 95712a19629fd8..c4cf2dda41b87e 100644 --- a/tensorflow/core/tfrt/runtime/BUILD +++ b/tensorflow/core/tfrt/runtime/BUILD @@ -153,6 +153,7 @@ tf_cc_shared_test( srcs = ["stream_test.cc"], tags = ["no_oss"], deps = [ + ":step_id", ":stream", "//tensorflow/core/framework:tensor", "//tensorflow/core/framework:tensor_testutil", @@ -162,6 +163,8 @@ tf_cc_shared_test( "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/time", "@com_google_googletest//:gtest_main", "@local_tsl//tsl/platform:env", diff --git a/tensorflow/core/tfrt/runtime/stream_test.cc b/tensorflow/core/tfrt/runtime/stream_test.cc index cac9113053bfab..bcb8a14a553675 100644 --- a/tensorflow/core/tfrt/runtime/stream_test.cc +++ b/tensorflow/core/tfrt/runtime/stream_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/tfrt/runtime/stream.h" #include +#include #include #include #include @@ -26,10 +27,13 @@ limitations under the License. #include "absl/log/check.h" #include "absl/log/log.h" #include "absl/memory/memory.h" +#include "absl/status/status.h" +#include "absl/strings/string_view.h" #include "absl/time/clock.h" #include "absl/time/time.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/tfrt/runtime/step_id.h" #include "tensorflow/core/tfrt/saved_model/saved_model_testutil.h" #include "tensorflow/core/tfrt/utils/thread_pool.h" #include "tsl/platform/env.h" @@ -44,27 +48,17 @@ using ::testing::AnyOf; using ::testing::ElementsAreArray; using ::testing::Pair; using ::testing::UnorderedElementsAre; - -class TestStreamInterface : public StreamControllerInterface { - public: - TestStreamInterface() : StreamControllerInterface("test_address") {} -}; - -const bool kUnused = []() { - GetGlobalStreamInterfaceFactory().RegisterController( - []() { return std::make_unique(); }); - return true; -}(); +using ::testing::status::StatusIs; TEST(StreamTest, Simple) { StreamCallbackId callback_id(1234); StepId step_id(5678); std::vector> outputs; - + ScopedStreamCallback scoped_stream_callback; { TF_ASSERT_OK_AND_ASSIGN( - auto scoped_stream_callback, + scoped_stream_callback, GetGlobalStreamCallbackRegistry().Register( "test_model", callback_id, step_id, [&](absl::flat_hash_map arg) { @@ -91,6 +85,16 @@ TEST(StreamTest, Simple) { ElementsAreArray({200})); EXPECT_THAT(GetTfTensorData(outputs[1]["c"]), ElementsAreArray({300})); + + ScopedStreamCallback scoped_stream_callback_copy; + scoped_stream_callback_copy = std::move(scoped_stream_callback); + + auto status = GetGlobalStreamCallbackRegistry().Register( + "test_model", callback_id, step_id, + [&](absl::flat_hash_map arg) { + outputs.push_back(std::move(arg)); + }); + EXPECT_THAT(status, StatusIs(absl::StatusCode::kAlreadyExists)); } TEST(StreamTest, MultipleWriters) { @@ -142,6 +146,63 @@ TEST(StreamTest, MultipleWriters) { } } +class TestStreamControllerInterface : public StreamControllerInterface { + public: + TestStreamControllerInterface() + : StreamControllerInterface("test_controller_address") {} +}; + +TEST(StreamControllerInterface, Initialize) { + GetGlobalStreamInterfaceFactory().RegisterController( + []() { return std::make_unique(); }); + TF_ASSERT_OK_AND_ASSIGN( + auto controller_interface, + GetGlobalStreamInterfaceFactory().CreateControllerStreamInterface()); + EXPECT_EQ(controller_interface->controller_address(), + "test_controller_address"); +} + +class TestStreamWorkerInterface : public StreamWorkerInterface { + public: + explicit TestStreamWorkerInterface(std::string worker_address) + : StreamWorkerInterface(worker_address) {} + absl::Status InvokeStreamCallback( + const StreamCallbackId& callback_id, + const std::vector& names, + const std::vector>>& + responses) override { + return absl::OkStatus(); + } +}; + +TEST(StreamWorkerInterface, Initialize) { + GetGlobalStreamInterfaceFactory().RegisterWorker( + [](absl::string_view address) + -> absl::StatusOr> { + return std::make_unique( + "test_worker_address"); + }); + TF_ASSERT_OK_AND_ASSIGN( + auto worker_interface, + GetGlobalStreamInterfaceFactory().CreateWorkerStreamInterface()( + "test_worker_address")); + EXPECT_EQ(worker_interface->controller_address(), "test_worker_address"); +} + +TEST(StepId, Generate) { + StepId step_id(1234); + EXPECT_EQ(step_id.id, 1234); + StepIdGenerator step_id_generator; + EXPECT_EQ(step_id_generator.GetNextStepId(), StepId(1)); + EXPECT_EQ(step_id_generator.GetNextStepId(), StepId(2)); + EXPECT_EQ(step_id_generator.GetNextStepId(), StepId(3)); +} + +TEST(StepId, GlobalInitial) { + EXPECT_EQ(GetGlobalInitialStepId(), 0); + TEST_ScopedInitialStepId test_id(127); + EXPECT_EQ(GetGlobalInitialStepId(), 127); +} } // namespace } // namespace tfrt_stub } // namespace tensorflow From cad3f2972e55027886d2e121e4d27a1b6ba60f97 Mon Sep 17 00:00:00 2001 From: Yifan Jiang Date: Mon, 13 May 2024 14:18:03 -0700 Subject: [PATCH 063/478] Fix a potential memory corruption in PJRT GPU client. Make sure the staging buffer outlives the memcpys to and from the staging buffer. PiperOrigin-RevId: 633329702 --- third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.cc b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.cc index 4e818b7f6fddd5..bfa11cb315c8c5 100644 --- a/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.cc +++ b/third_party/xla/xla/pjrt/gpu/se_gpu_pjrt_client.cc @@ -393,8 +393,8 @@ class AsyncHostToDeviceTransferManager event.value()); auto cleanup = [this, buffer_index, event = std::move(event).value(), - stream, is_last_transfer, - on_done = std::move(on_done)]() mutable { + stream, is_last_transfer, on_done = std::move(on_done), + staging_buffer = std::move(staging_buffer)]() mutable { CleanUp(buffer_index, std::move(event), stream, is_last_transfer, std::move(on_done)); }; From b3a856c2c25fe0e7d6df49b058b1a1bf31de820d Mon Sep 17 00:00:00 2001 From: Junwhan Ahn Date: Mon, 13 May 2024 14:36:43 -0700 Subject: [PATCH 064/478] Enable JAX memory tests for GPUs and CPUs PjRt GPU and CPU has recently gotten memory space support with just one memory space per device, so enabling relevant JAX memory tests. Most tests cannot be enabled yet because they rely on `unpinned_host`, so only enabling `ShardingMemoriesTest` for now. PiperOrigin-RevId: 633335638 --- third_party/xla/xla/python/xla_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/xla/xla/python/xla_client.py b/third_party/xla/xla/python/xla_client.py index 37c462efc3e0f6..af0590c12c0f8a 100644 --- a/third_party/xla/xla/python/xla_client.py +++ b/third_party/xla/xla/python/xla_client.py @@ -49,7 +49,7 @@ # Just an internal arbitrary increasing number to help with backward-compatible # changes. In JAX, reference this via jax._src.lib.xla_extension_version. -_version = 264 +_version = 265 # Version number for MLIR:Python components. mlir_api_version = 56 From 9a97b727bcb6536663878404f06bb71e7cc4113b Mon Sep 17 00:00:00 2001 From: Swachhand Lokhande Date: Mon, 13 May 2024 14:41:53 -0700 Subject: [PATCH 065/478] Add TFExecutorGraphPruningPass to TF dialect to executor export pipeline This is to prune out unused ops/nodes in the graph. This doesn't affect correctness but in some cases will drastically reduce memory/network usage and improve performance depending on the number and type of dead nodes that are pruned. PiperOrigin-RevId: 633337115 --- tensorflow/compiler/mlir/tf2xla/api/v2/BUILD | 4 +- .../api/v2/testdata/func_with_dead_ops.mlir | 62 +++++++++++++++++++ .../tf2xla/api/v2/tf_dialect_to_executor.cc | 2 + .../api/v2/tf_dialect_to_executor_test.cc | 32 +++++++++- 4 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 tensorflow/compiler/mlir/tf2xla/api/v2/testdata/func_with_dead_ops.mlir diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD b/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD index 545203ad20ea23..709a63bea84ebe 100644 --- a/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD @@ -210,6 +210,7 @@ tf_cc_test( srcs = ["tf_dialect_to_executor_test.cc"], data = [ "testdata/empty_func.mlir", + "testdata/func_with_dead_ops.mlir", "testdata/invalid_executor.mlir", ], deps = [ @@ -220,10 +221,9 @@ tf_cc_test( "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", + "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@llvm-project//mlir:Parser", "@local_tsl//tsl/lib/core:status_test_util", - "@local_tsl//tsl/lib/monitoring:test_utils", - "@local_tsl//tsl/platform:status", ], ) diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/testdata/func_with_dead_ops.mlir b/tensorflow/compiler/mlir/tf2xla/api/v2/testdata/func_with_dead_ops.mlir new file mode 100644 index 00000000000000..f8dd51f4e12d3c --- /dev/null +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/testdata/func_with_dead_ops.mlir @@ -0,0 +1,62 @@ +module attributes {tf.devices = {"/job:tpu_host_worker/replica:0/task:0/device:CPU:0", "/job:tpu_host_worker/replica:0/task:0/device:TPU:0", "/job:tpu_host_worker/replica:0/task:0/device:TPU:1", "/job:tpu_host_worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:tpu_host_worker/replica:0/task:1/device:CPU:0", "/job:tpu_host_worker/replica:0/task:1/device:TPU:0", "/job:tpu_host_worker/replica:0/task:1/device:TPU:1", "/job:tpu_host_worker/replica:0/task:1/device:TPU_SYSTEM:0", "/job:tpu_host_worker/replica:0/task:2/device:CPU:0", "/job:tpu_host_worker/replica:0/task:2/device:TPU:0", "/job:tpu_host_worker/replica:0/task:2/device:TPU:1", "/job:tpu_host_worker/replica:0/task:2/device:TPU_SYSTEM:0", "/job:tpu_host_worker/replica:0/task:3/device:CPU:0", "/job:tpu_host_worker/replica:0/task:3/device:TPU:0", "/job:tpu_host_worker/replica:0/task:3/device:TPU:1", "/job:tpu_host_worker/replica:0/task:3/device:TPU_SYSTEM:0"}, tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 1847 : i32}} { + func.func @main(%arg0: tensor {tf._user_specified_name = "steps", tf.device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"}, %arg1: tensor<*x!tf_type.resource>> {tf._user_specified_name = "899", tf.device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"}, %arg2: tensor<*x!tf_type.resource>> {tf._user_specified_name = "901", tf.device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"}, %arg3: tensor<*x!tf_type.resource>> {tf._user_specified_name = "903", tf.device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"}, %arg4: tensor<*x!tf_type.resource>> {tf._user_specified_name = "905", tf.device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"}, %arg5: tensor<*x!tf_type.resource>> {tf._user_specified_name = "907", tf.device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"}, %arg6: tensor<*x!tf_type.resource>> {tf._user_specified_name = "909", tf.device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"}, %arg7: tensor<*x!tf_type.resource>> {tf._user_specified_name = "911", tf.device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"}, %arg8: tensor<*x!tf_type.resource>> {tf._user_specified_name = "913", tf.device = "/job:tpu_host_worker/replica:0/task:1/device:CPU:0"}, %arg9: tensor<*x!tf_type.resource>> {tf._user_specified_name = "915", tf.device = "/job:tpu_host_worker/replica:0/task:2/device:CPU:0"}, %arg10: tensor<*x!tf_type.resource>> {tf._user_specified_name = "917", tf.device = "/job:tpu_host_worker/replica:0/task:3/device:CPU:0"}, %arg11: tensor<*x!tf_type.resource>> {tf._user_specified_name = "919", tf.device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"}, %arg12: tensor<*x!tf_type.resource>> {tf._user_specified_name = "921", tf.device = "/job:tpu_host_worker/replica:0/task:1/device:CPU:0"}, %arg13: tensor<*x!tf_type.resource>> {tf._user_specified_name = "923", tf.device = "/job:tpu_host_worker/replica:0/task:2/device:CPU:0"}, %arg14: tensor<*x!tf_type.resource>> {tf._user_specified_name = "925", tf.device = "/job:tpu_host_worker/replica:0/task:3/device:CPU:0"}, %arg15: tensor<*x!tf_type.resource>> {tf._user_specified_name = "927", tf.device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"}, %arg16: tensor<*x!tf_type.resource>> {tf._user_specified_name = "929", tf.device = "/job:tpu_host_worker/replica:0/task:1/device:CPU:0"}, %arg17: tensor<*x!tf_type.resource>> {tf._user_specified_name = "931", tf.device = "/job:tpu_host_worker/replica:0/task:2/device:CPU:0"}, %arg18: tensor<*x!tf_type.resource>> {tf._user_specified_name = "933", tf.device = "/job:tpu_host_worker/replica:0/task:3/device:CPU:0"}, %arg19: tensor<*x!tf_type.resource>> {tf._user_specified_name = "935", tf.device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"}, %arg20: tensor<*x!tf_type.resource>> {tf._user_specified_name = "937", tf.device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"}, %arg21: tensor<*x!tf_type.resource>> {tf._user_specified_name = "939", tf.device = "/job:tpu_host_worker/replica:0/task:0/device:CPU:0"}) -> tensor attributes {allow_soft_placement = false, tf.entry_function = {control_outputs = "", inputs = "steps,unknown,unknown_0,unknown_1,unknown_2,unknown_3,unknown_4,unknown_5,unknown_6,unknown_7,unknown_8,unknown_9,unknown_10,unknown_11,unknown_12,unknown_13,unknown_14,unknown_15,unknown_16,unknown_17,unknown_18,unknown_19", outputs = "statefulpartitionedcall_RetVal"}} { + %0 = "tf.ReadVariableOp"(%arg19) : (tensor<*x!tf_type.resource>>) -> tensor<128x1024xf32> + %1 = "tf.ReadVariableOp"(%arg1) : (tensor<*x!tf_type.resource>>) -> tensor + %2 = "tf.ReadVariableOp"(%arg2) : (tensor<*x!tf_type.resource>>) -> tensor + %3 = "tf.ReadVariableOp"(%arg4) : (tensor<*x!tf_type.resource>>) -> tensor<1024xf32> + %4 = "tf.ReadVariableOp"(%arg3) : (tensor<*x!tf_type.resource>>) -> tensor<128x1024xf32> + %5 = "tf.ReadVariableOp"(%arg5) : (tensor<*x!tf_type.resource>>) -> tensor<1024x1xf32> + %6 = "tf.ReadVariableOp"(%arg20) : (tensor<*x!tf_type.resource>>) -> tensor<1024xf32> + %7 = "tf.ReadVariableOp"(%arg21) : (tensor<*x!tf_type.resource>>) -> tensor<1024x1xf32> + %8 = "tf.ReadVariableOp"(%arg6) : (tensor<*x!tf_type.resource>>) -> tensor + %9 = "tf.Const"() <{value = dense<"test"> : tensor<3x!tf_type.string>}> : () -> tensor<3x!tf_type.string> + %cst = "tf.Const"() <{value = dense<0> : tensor}> : () -> tensor + %11:4 = "tf.Split"(%cst, %0) {num_split = 4 : i32} : (tensor, tensor<128x1024xf32>) -> (tensor<32x1024xf32>, tensor<32x1024xf32>, tensor<32x1024xf32>, tensor<32x1024xf32>) + %cst_0 = "tf.Const"() <{value = dense<0> : tensor}> : () -> tensor + %12:4 = "tf.Split"(%cst_0, %4) {num_split = 4 : i32} : (tensor, tensor<128x1024xf32>) -> (tensor<32x1024xf32>, tensor<32x1024xf32>, tensor<32x1024xf32>, tensor<32x1024xf32>) + %cst_1 = "tf.Const"() <{value = dense<0> : tensor}> : () -> tensor + %cst_2 = "tf.Const"() <{value = dense<0> : tensor}> : () -> tensor + %13:20 = tf_device.replicate {devices = {TPU_REPLICATED_CORE_0 = ["/job:tpu_host_worker/replica:0/task:0/device:TPU:0", "/job:tpu_host_worker/replica:0/task:2/device:TPU:0"], TPU_REPLICATED_CORE_1 = ["/job:tpu_host_worker/replica:0/task:0/device:TPU:1", "/job:tpu_host_worker/replica:0/task:2/device:TPU:1"], TPU_REPLICATED_CORE_2 = ["/job:tpu_host_worker/replica:0/task:1/device:TPU:0", "/job:tpu_host_worker/replica:0/task:3/device:TPU:0"], TPU_REPLICATED_CORE_3 = ["/job:tpu_host_worker/replica:0/task:1/device:TPU:1", "/job:tpu_host_worker/replica:0/task:3/device:TPU:1"], TPU_REPLICATED_HOST_0 = ["/job:tpu_host_worker/replica:0/task:0/device:CPU:0", "/job:tpu_host_worker/replica:0/task:2/device:CPU:0"], TPU_REPLICATED_HOST_1 = ["/job:tpu_host_worker/replica:0/task:0/device:CPU:0", "/job:tpu_host_worker/replica:0/task:2/device:CPU:0"], TPU_REPLICATED_HOST_2 = ["/job:tpu_host_worker/replica:0/task:1/device:CPU:0", "/job:tpu_host_worker/replica:0/task:3/device:CPU:0"], TPU_REPLICATED_HOST_3 = ["/job:tpu_host_worker/replica:0/task:1/device:CPU:0", "/job:tpu_host_worker/replica:0/task:3/device:CPU:0"]}, n = 2 : i32} { + %16:40 = "tf_device.parallel_execute"() ({ + %19:10 = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_0"}> ({ + %20:10 = "tf.TPUExecute"(%arg0, %11#0, %1, %2, %3, %12#0, %5, %6, %7, %8, %9) : (tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor, tensor<3x!tf_type.string>) -> (tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor) + tf_device.return %20#0, %20#1, %20#2, %20#3, %20#4, %20#5, %20#6, %20#7, %20#8, %20#9 : tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor + }) : () -> (tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor) + tf_device.return %19#0, %19#1, %19#2, %19#3, %19#4, %19#5, %19#6, %19#7, %19#8, %19#9 : tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor + }, { + %19:10 = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_1"}> ({ + %20:10 = "tf.TPUExecute"(%arg0, %11#1, %1, %2, %3, %12#1, %5, %6, %7, %8, %9) : (tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor, tensor<3x!tf_type.string>) -> (tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor) + tf_device.return %20#0, %20#1, %20#2, %20#3, %20#4, %20#5, %20#6, %20#7, %20#8, %20#9 : tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor + }) : () -> (tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor) + tf_device.return %19#0, %19#1, %19#2, %19#3, %19#4, %19#5, %19#6, %19#7, %19#8, %19#9 : tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor + }, { + %19:10 = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_2"}> ({ + %20:10 = "tf.TPUExecute"(%arg0, %11#2, %1, %2, %3, %12#2, %5, %6, %7, %8, %9) : (tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor, tensor<3x!tf_type.string>) -> (tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor) + tf_device.return %20#0, %20#1, %20#2, %20#3, %20#4, %20#5, %20#6, %20#7, %20#8, %20#9 : tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor + }) : () -> (tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor) + tf_device.return %19#0, %19#1, %19#2, %19#3, %19#4, %19#5, %19#6, %19#7, %19#8, %19#9 : tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor + }, { + %19:10 = "tf_device.launch"() <{device = "TPU_REPLICATED_CORE_3"}> ({ + %20:10 = "tf.TPUExecute"(%arg0, %11#3, %1, %2, %3, %12#3, %5, %6, %7, %8, %9) : (tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor, tensor<3x!tf_type.string>) -> (tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor) + tf_device.return %20#0, %20#1, %20#2, %20#3, %20#4, %20#5, %20#6, %20#7, %20#8, %20#9 : tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor + }) : () -> (tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor) + tf_device.return %19#0, %19#1, %19#2, %19#3, %19#4, %19#5, %19#6, %19#7, %19#8, %19#9 : tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor + }) : () -> (tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor, tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor, tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor, tensor, tensor<32x1024xf32>, tensor, tensor, tensor<1024xf32>, tensor<32x1024xf32>, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor) + %17 = "tf.Concat"(%cst_1, %16#5, %16#15, %16#25, %16#35) : (tensor, tensor<32x1024xf32>, tensor<32x1024xf32>, tensor<32x1024xf32>, tensor<32x1024xf32>) -> tensor<128x1024xf32> + %18 = "tf.Concat"(%cst_2, %16#1, %16#11, %16#21, %16#31) : (tensor, tensor<32x1024xf32>, tensor<32x1024xf32>, tensor<32x1024xf32>, tensor<32x1024xf32>) -> tensor<128x1024xf32> + tf_device.return %16#0, %16#9, %16#8, %16#7, %16#6, %17, %16#4, %16#3, %16#2, %18 : tensor, tensor, tensor<1024x1xf32>, tensor<1024xf32>, tensor<1024x1xf32>, tensor<128x1024xf32>, tensor<1024xf32>, tensor, tensor, tensor<128x1024xf32> + } + "tf.AssignVariableOp"(%arg19, %13#18) <{validate_shape = false}> : (tensor<*x!tf_type.resource>>, tensor<128x1024xf32>) -> () + "tf.AssignVariableOp"(%arg1, %13#16) <{validate_shape = false}> : (tensor<*x!tf_type.resource>>, tensor) -> () + "tf.AssignVariableOp"(%arg2, %13#14) <{validate_shape = false}> : (tensor<*x!tf_type.resource>>, tensor) -> () + "tf.AssignVariableOp"(%arg4, %13#12) <{validate_shape = false}> : (tensor<*x!tf_type.resource>>, tensor<1024xf32>) -> () + "tf.AssignVariableOp"(%arg3, %13#10) <{validate_shape = false}> : (tensor<*x!tf_type.resource>>, tensor<128x1024xf32>) -> () + "tf.AssignVariableOp"(%arg5, %13#8) <{validate_shape = false}> : (tensor<*x!tf_type.resource>>, tensor<1024x1xf32>) -> () + "tf.AssignVariableOp"(%arg20, %13#6) <{validate_shape = false}> : (tensor<*x!tf_type.resource>>, tensor<1024xf32>) -> () + "tf.AssignVariableOp"(%arg21, %13#4) <{validate_shape = false}> : (tensor<*x!tf_type.resource>>, tensor<1024x1xf32>) -> () + "tf.AssignVariableOp"(%arg6, %13#2) <{validate_shape = true}> {_has_manual_control_dependencies = true} : (tensor<*x!tf_type.resource>>, tensor) -> () + %14 = "tf.ReadVariableOp"(%arg2) {device = ""} : (tensor<*x!tf_type.resource>>) -> tensor + %15 = "tf.Identity"(%14) {device = ""} : (tensor) -> tensor + return %15 : tensor + } +} diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.cc b/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.cc index c92fd85d3567b4..cd13e869e811dd 100644 --- a/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.cc +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.cc @@ -88,6 +88,8 @@ void AddTfDialectToExecutorPasses(OpPassManager &pm) { pm.addNestedPass(mlir::TFTPU::CreateTPUDevicePropagationPass()); pm.addNestedPass(mlir::TFTPU::CreateTPUColocateSplitsPass()); pm.addPass(mlir::createSymbolDCEPass()); + pm.addNestedPass( + mlir::tf_executor::CreateTFExecutorGraphPruningPass()); if (tensorflow::GetMlirCommonFlags() ->tf_mlir_enable_convert_control_to_data_outputs_pass) { bool composite_tpuexecute_side_effects = diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor_test.cc b/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor_test.cc index 0c64dd3dcbe1a3..897c800d9e4cd7 100644 --- a/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor_test.cc +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor_test.cc @@ -15,12 +15,16 @@ limitations under the License. #include "tensorflow/compiler/mlir/tf2xla/api/v2/tf_dialect_to_executor.h" +#include + #include #include #include #include "absl/status/status.h" #include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "llvm/Support/raw_ostream.h" #include "mlir/IR/BuiltinOps.h" // from @llvm-project #include "mlir/IR/DialectRegistry.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project @@ -30,7 +34,6 @@ limitations under the License. #include "tensorflow/core/lib/monitoring/cell_reader.h" #include "tensorflow/core/platform/resource_loader.h" #include "tsl/lib/core/status_test_util.h" -#include "tsl/platform/status.h" namespace tensorflow { namespace tf2xla { @@ -53,6 +56,16 @@ std::string TestDataPath() { "tensorflow/compiler/mlir/tf2xla/api/v2/testdata/"); } +size_t CountSubstring(absl::string_view str, absl::string_view substr) { + size_t count = 0; + size_t idx = str.find(substr); + while (idx != std::string::npos) { + count++; + idx = str.find(substr, idx + 1); + } + return count; +} + class TensorflowDialectToExecutorTest : public ::testing::Test { public: TensorflowDialectToExecutorTest() { @@ -100,6 +113,23 @@ TEST_F(TensorflowDialectToExecutorTest, ErrorsWhenCannotConvert) { EXPECT_EQ(compilation_status.Delta(kExportFailed), 1); } +TEST_F(TensorflowDialectToExecutorTest, PrunesDeadOps) { + CellReader compilation_status(kExportStreamzName); + + TF_ASSERT_OK(CreateMlirModule("func_with_dead_ops.mlir")); + + TF_EXPECT_OK(ExportFromTensorflowDialectToExecutor(*mlir_module_)); + + std::string module_dump; + llvm::raw_string_ostream raw_stream(module_dump); + mlir_module_->print(raw_stream); + + EXPECT_EQ(compilation_status.Delta(kExportSuccess), 1); + EXPECT_EQ(compilation_status.Delta(kExportFailed), 0); + EXPECT_EQ( + CountSubstring(module_dump, "tf_executor.island wraps \"tf.Concat\""), 2); +} + } // namespace } // namespace v2 } // namespace tf2xla From 1a59524ded5f73beb09fa78ac2a4eed8673ba6bb Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 13 May 2024 14:45:39 -0700 Subject: [PATCH 066/478] [xla] NFC: Delete deprecated flags and remains of xla runtime PiperOrigin-RevId: 633338202 --- third_party/xla/xla/debug_options_flags.cc | 36 ------------------- .../xla/xla/service/gpu/gpu_compiler.cc | 2 +- .../xla/xla/service/gpu/gpu_executable.cc | 13 ------- .../xla/xla/service/gpu/gpu_executable.h | 3 -- .../xla/service/gpu/ir_emitter_unnested.cc | 6 ---- third_party/xla/xla/xla.proto | 26 ++++---------- 6 files changed, 8 insertions(+), 78 deletions(-) diff --git a/third_party/xla/xla/debug_options_flags.cc b/third_party/xla/xla/debug_options_flags.cc index 3eb1a7a2fd4292..831a26b2c96793 100644 --- a/third_party/xla/xla/debug_options_flags.cc +++ b/third_party/xla/xla/debug_options_flags.cc @@ -78,8 +78,6 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { #ifdef XLA_CPU_USE_ACL opts.set_xla_cpu_use_acl(true); #endif - opts.set_xla_cpu_use_xla_runtime(false); - opts.set_xla_cpu_sparse_cuda_threads(0); opts.set_xla_cpu_enable_fast_math(false); // Disable forms of fast math that have caused users problems in the past. @@ -111,10 +109,8 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { opts.add_xla_gpu_enable_command_buffer(DebugOptions::CUBLAS); opts.add_xla_gpu_enable_command_buffer(DebugOptions::CUSTOM_CALL); opts.add_xla_gpu_enable_command_buffer(DebugOptions::CUDNN); - opts.set_xla_gpu_graph_num_runs_to_instantiate(-1); opts.set_xla_gpu_graph_min_graph_size(5); opts.set_xla_gpu_graph_enable_concurrent_region(false); - opts.set_xla_gpu_graph_eviction_timeout_seconds(60); // Despite the name, fast min/max on GPUs does not seem to be any faster, and // adds very counter-intuitive "NaN-swallowing" behavior. @@ -138,7 +134,6 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { opts.set_xla_detailed_logging(true); opts.set_xla_enable_dumping(true); - opts.set_xla_gpu_enable_xla_runtime_executable(false); opts.set_xla_gpu_enable_custom_fusions(false); opts.set_xla_gpu_enable_address_computation_fusion(true); opts.set_xla_gpu_nccl_termination_timeout_seconds(-1); @@ -766,17 +761,6 @@ void MakeDebugOptionsFlags(std::vector* flag_list, "xla_cpu_use_acl", bool_setter_for(&DebugOptions::set_xla_cpu_use_acl), debug_options->xla_cpu_use_acl(), "Generate calls to ACL (Arm Compute Library) in the CPU backend.")); - flag_list->push_back( - tsl::Flag("xla_cpu_use_xla_runtime", - bool_setter_for(&DebugOptions::set_xla_cpu_use_xla_runtime), - debug_options->xla_cpu_use_xla_runtime(), - "Enable XLA Runtime in the CPU backend.")); - flag_list->push_back(tsl::Flag( - "xla_cpu_sparse_cuda_threads", - int32_setter_for(&DebugOptions::set_xla_cpu_sparse_cuda_threads), - debug_options->xla_cpu_sparse_cuda_threads(), - "Sets number fo CUDA threads for sparse GPU acceleration in the CPU " - "backend (0 = off).")); flag_list->push_back(tsl::Flag( "xla_gpu_crash_on_verification_failures", bool_setter_for( @@ -1148,13 +1132,6 @@ void MakeDebugOptionsFlags(std::vector* flag_list, " can either be a list of command types or a list of command types with" " + and - as prefix, which indicate adding or removing a command type" " to/from the default list.")); - flag_list->push_back(tsl::Flag( - "xla_gpu_graph_num_runs_to_instantiate", - int32_setter_for( - &DebugOptions::set_xla_gpu_graph_num_runs_to_instantiate), - debug_options->xla_gpu_graph_num_runs_to_instantiate(), - "Instantiate a gpu graph after the time a captured function is executed " - "reaches the threshold.")); flag_list->push_back(tsl::Flag( "xla_gpu_graph_min_graph_size", int32_setter_for(&DebugOptions::set_xla_gpu_graph_min_graph_size), @@ -1168,14 +1145,6 @@ void MakeDebugOptionsFlags(std::vector* flag_list, debug_options->xla_gpu_graph_enable_concurrent_region(), "Identify concurrent regions in gpu graphs and execute them " "concurrently.")); - flag_list->push_back(tsl::Flag( - "xla_gpu_graph_eviction_timeout_seconds", - int32_setter_for( - &DebugOptions::set_xla_gpu_graph_eviction_timeout_seconds), - debug_options->xla_gpu_graph_eviction_timeout_seconds(), - "Timeout in seconds to evict instantiated Gpu graphs from device. When " - "XLA instantiates new Gpu graphs, it evicts graphs that were not " - "recently executed to free space on device.")); flag_list->push_back( tsl::Flag("xla_dump_disable_metadata", @@ -1197,11 +1166,6 @@ void MakeDebugOptionsFlags(std::vector* flag_list, "MLIR will be in the llvm-parsable format and can be processed by " "mlir-opt tools. " "Pretty print form is not legal MLIR.")); - flag_list->push_back(tsl::Flag( - "xla_gpu_enable_xla_runtime_executable", - bool_setter_for(&DebugOptions::set_xla_gpu_enable_xla_runtime_executable), - debug_options->xla_gpu_enable_xla_runtime_executable(), - "Whether to enable XLA runtime for XLA:GPU backend")); flag_list->push_back(tsl::Flag( "xla_gpu_enable_custom_fusions", bool_setter_for(&DebugOptions::set_xla_gpu_enable_custom_fusions), diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index 35f6bf612f7ee6..276a4f2c9929a7 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -2210,7 +2210,7 @@ absl::Status GpuCompiler::RunPostSchedulingPipelines( // After we have a scheduled module and all operations wrapped into fusions we // can decide how to wrap them into command buffers. - if (!IsXlaRuntimeExecutableEnabled(module->config())) { + { HloPassPipeline pipeline("command-buffer-scheduling"); auto driver_version = se::gpu::GpuDriver::GetDriverVersion(); #if GOOGLE_CUDA diff --git a/third_party/xla/xla/service/gpu/gpu_executable.cc b/third_party/xla/xla/service/gpu/gpu_executable.cc index c4c55a0aed85af..25343eb9c93512 100644 --- a/third_party/xla/xla/service/gpu/gpu_executable.cc +++ b/third_party/xla/xla/service/gpu/gpu_executable.cc @@ -109,19 +109,6 @@ namespace gpu { using ::tsl::profiler::ScopedAnnotation; -bool IsXlaRuntimeExecutableEnabled(const HloModuleConfig& config) { - bool enabled = config.debug_options().xla_gpu_enable_xla_runtime_executable(); - if (enabled) { - LOG(ERROR) - << "XLA:GPU tried to use deprecated xla runtime by setting " - "--xla_gpu_enable_xla_runtime_executable flag to `true` but the " - "flag value was ignored as XLA:GPU uses default runtime. This flag " - "together with the deprecated code will be removed soon. Please " - "report bugs to XLA team if this breaks your workloads."; - } - return false; -} - static bool NeedsAsyncCommsStream(Thunk& thunk) { switch (thunk.kind()) { case Thunk::Kind::kNcclAllReduceStart: diff --git a/third_party/xla/xla/service/gpu/gpu_executable.h b/third_party/xla/xla/service/gpu/gpu_executable.h index ae481f1b45519d..7b5bfd7dcc2a8c 100644 --- a/third_party/xla/xla/service/gpu/gpu_executable.h +++ b/third_party/xla/xla/service/gpu/gpu_executable.h @@ -56,9 +56,6 @@ limitations under the License. namespace xla { namespace gpu { -// Returns whether GpuExecutable runs with Xla Runtime. -bool IsXlaRuntimeExecutableEnabled(const HloModuleConfig& config); - // GPU-targeting implementation of the XLA Executable interface. // // Launches the given GPU kernel via the StreamExecutor. diff --git a/third_party/xla/xla/service/gpu/ir_emitter_unnested.cc b/third_party/xla/xla/service/gpu/ir_emitter_unnested.cc index 9ba1be41febde8..308a7c479822f5 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_unnested.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_unnested.cc @@ -1307,12 +1307,6 @@ absl::Status IrEmitterUnnested::EmitCustomCallThunk( return absl::OkStatus(); } - // TODO(ezhulenev): Custom calls registered with an XLA runtime are not part - // of a legacy registry, or an FFI registry. For now we simply ignore them. - if (debug_options.xla_gpu_enable_xla_runtime_executable()) { - return absl::OkStatus(); - } - return absl::UnimplementedError( absl::StrCat("No registered implementation for custom call to ", call_target_name, " for platform ", platform_name())); diff --git a/third_party/xla/xla/xla.proto b/third_party/xla/xla/xla.proto index c49d4a509a2618..b5044afe4d6292 100644 --- a/third_party/xla/xla/xla.proto +++ b/third_party/xla/xla/xla.proto @@ -114,8 +114,7 @@ message DebugOptions { // Generate calls to MKL-DNN in the CPU backend. bool xla_cpu_use_mkl_dnn = 97; - // Enable XLA Runtime in the CPU backend. - bool xla_cpu_use_xla_runtime = 177; + reserved 177; // Was xla_cpu_use_xla_runtime reserved 98; // Was xla_gpu_max_kernel_unroll_factor @@ -165,12 +164,7 @@ message DebugOptions { // above! bool xla_gpu_enable_fast_min_max = 100; - // Defines the number of CUDA threads that can be used to accelerate - // a sparse computation compiled for the XLA Runtime and CPU backend. - // By default (value 0), no acceleration is used. Otherwise, this - // many threads may be used to accelerate sparse operations, typically - // useful when accelerating structured sparsity. - int32 xla_cpu_sparse_cuda_threads = 207; + reserved 207; // Was xla_cpu_sparse_cuda_threads // Allows xla to increase the output precision of floating point operations // and all floating-point conversions to be simplified, including those @@ -454,8 +448,7 @@ message DebugOptions { reserved 161; // Was xla_gpu_bef_executable reserved 162; // Was xla_gpu_bef_thunk - // If true, use XLA runtime for XLA:GPU backend. - bool xla_gpu_enable_xla_runtime_executable = 169; + reserved 169; // Was xla_gpu_enable_xla_runtime_executable // If true, XLA will try to pattern match subgraphs of HLO operations into // custom fusions registered in the current process (pre-compiled hand written @@ -466,7 +459,8 @@ message DebugOptions { // if `xla_gpu_enable_custom_fusion` set to true. string xla_gpu_enable_custom_fusions_re = 264; - // If true, use XLA runtime for XLA:GPU backend. + // Enables address computation fusion to optimize dynamic-slice and + // dynamic-update-slice operations around library calls. bool xla_gpu_enable_address_computation_fusion = 105; reserved 233; // was xla_gpu_enable_gpu2_runtime @@ -500,10 +494,7 @@ message DebugOptions { // Determine the types of commands that are recorded into command buffers. repeated CommandBufferCmdType xla_gpu_enable_command_buffer = 258; - // Only instantiates a GPU graph after the captured function execution count - // reaches the threshold. This constant is a heuristic to avoid creating a - // large number of CUDA graph instances in memory. - int32 xla_gpu_graph_num_runs_to_instantiate = 202; + reserved 202; // Was xla_gpu_graph_num_runs_to_instantiate // This number determines how many moved instructions like fusion kernels are // required for a region to be captured as a function to be launched as a GPU @@ -513,10 +504,7 @@ message DebugOptions { // Identify concurrent regions in GPU graphs and execute them concurrently. bool xla_gpu_graph_enable_concurrent_region = 215; - // Timeout in seconds to evict instantiated Gpu graphs from device. When XLA - // instantiates new Gpu graphs, it evicts graphs that were not recently - // executed to free space on device. - int32 xla_gpu_graph_eviction_timeout_seconds = 230; + reserved 230; // Was xla_gpu_graph_eviction_timeout_seconds // Size threshold (in megabytes) for the GPU redzone scratch allocator. int64 xla_gpu_redzone_scratch_max_megabytes = 167; From 09ce711fffb3c4268af4a1b55d39e1d37eb6e822 Mon Sep 17 00:00:00 2001 From: Fergus Henderson Date: Mon, 13 May 2024 14:46:48 -0700 Subject: [PATCH 067/478] Remove EmitErrorReporter class, since it isn't used. PiperOrigin-RevId: 633338523 --- tensorflow/compiler/mlir/lite/BUILD | 15 ------- .../compiler/mlir/lite/emit_error_reporter.cc | 30 -------------- .../compiler/mlir/lite/emit_error_reporter.h | 39 ------------------- 3 files changed, 84 deletions(-) delete mode 100644 tensorflow/compiler/mlir/lite/emit_error_reporter.cc delete mode 100644 tensorflow/compiler/mlir/lite/emit_error_reporter.h diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index 72970c2d07d4b5..2cfbecb6cbbc89 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -1070,21 +1070,6 @@ tf_native_cc_binary( ], ) -cc_library( - name = "emit_error_reporter", - srcs = [ - "emit_error_reporter.cc", - ], - hdrs = [ - "emit_error_reporter.h", - ], - deps = [ - "//tensorflow/lite/core/api", - "@llvm-project//mlir:FuncDialect", - "@llvm-project//mlir:IR", - ], -) - cc_library( name = "flatbuffer_export", srcs = [ diff --git a/tensorflow/compiler/mlir/lite/emit_error_reporter.cc b/tensorflow/compiler/mlir/lite/emit_error_reporter.cc deleted file mode 100644 index f9c4760326b5d6..00000000000000 --- a/tensorflow/compiler/mlir/lite/emit_error_reporter.cc +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/mlir/lite/emit_error_reporter.h" - -#include -#include - -namespace tflite { - -int EmitErrorReporter::Report(const char* format, va_list args) { - std::vector buf(1 + snprintf(nullptr, 0, format, args)); - std::vsnprintf(buf.data(), buf.size(), format, args); - module_.emitError() << std::string(buf.begin(), buf.end()); - return 0; -} - -} // namespace tflite diff --git a/tensorflow/compiler/mlir/lite/emit_error_reporter.h b/tensorflow/compiler/mlir/lite/emit_error_reporter.h deleted file mode 100644 index 9e9a5925600fc2..00000000000000 --- a/tensorflow/compiler/mlir/lite/emit_error_reporter.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_COMPILER_MLIR_LITE_EMIT_ERROR_REPORTER_H_ -#define TENSORFLOW_COMPILER_MLIR_LITE_EMIT_ERROR_REPORTER_H_ - -#include - -#include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project -#include "mlir/IR/BuiltinOps.h" // from @llvm-project -#include "tensorflow/lite/core/api/error_reporter.h" - -namespace tflite { - -// Error reporter that reports errors via the module's emitError. -class EmitErrorReporter : public ErrorReporter { - public: - explicit EmitErrorReporter(mlir::ModuleOp module) : module_(module) {} - int Report(const char* format, va_list args) override; - - private: - mlir::ModuleOp module_; -}; - -} // namespace tflite - -#endif // TENSORFLOW_COMPILER_MLIR_LITE_EMIT_ERROR_REPORTER_H_ From 7c51020c4efc2ec0ab18d9d9c3c8a98408c46e08 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 14:59:52 -0700 Subject: [PATCH 068/478] Move #include outside of ifdef. PiperOrigin-RevId: 633342149 --- .../internal/optimized/integer_ops/depthwise_conv_3x3_filter.h | 3 ++- .../optimized/integer_ops/depthwise_conv_hybrid_3x3_filter.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h b/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h index 0cb1a23e5567b9..8d761dd3dc3e10 100644 --- a/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h +++ b/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h @@ -15,6 +15,8 @@ limitations under the License. #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_INTEGER_OPS_DEPTHWISE_CONV_3X3_FILTER_H_ #define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_INTEGER_OPS_DEPTHWISE_CONV_3X3_FILTER_H_ +#include + #include #include "ruy/profiler/instrumentation.h" // from @ruy @@ -32,7 +34,6 @@ namespace depthwise_conv { // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on // Jetson TX-2. This compiler does not support the offsetof() macro. #if defined(__aarch64__) && !defined(GOOGLE_L4T) -#include // Represents the number of bytes offset from the start of the // DepthwiseConvParams struct. This is used in the asm to load parameters. diff --git a/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_hybrid_3x3_filter.h b/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_hybrid_3x3_filter.h index f9472515417e85..5c7abda84fcfa0 100644 --- a/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_hybrid_3x3_filter.h +++ b/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_hybrid_3x3_filter.h @@ -15,6 +15,8 @@ limitations under the License. #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_INTEGER_OPS_DEPTHWISE_CONV_HYBRID_3X3_FILTER_H_ #define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_INTEGER_OPS_DEPTHWISE_CONV_HYBRID_3X3_FILTER_H_ +#include + #include #include "ruy/profiler/instrumentation.h" // from @ruy @@ -32,7 +34,6 @@ namespace depthwise_conv { // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on // Jetson TX-2. This compiler does not support the offsetof() macro. #if defined(__aarch64__) && !defined(GOOGLE_L4T) -#include // Represents the number of bytes offset from the start of the // DepthwiseConvParams struct. This is used in the asm to load parameters. From 0025bdd212d4c153ea87ce40cd098e094f3f944f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 15:25:03 -0700 Subject: [PATCH 069/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633349564 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index c8c55e08ca0d32..fb2f485604495b 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugonly +go/nodeserialize op { name: "Abort" attr { From 1144753a8fb45c6dda8fec7c085bf307b6903aef Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 13 May 2024 15:41:41 -0700 Subject: [PATCH 070/478] Integrate LLVM at llvm/llvm-project@a6d7828f4c50 Updates LLVM usage to match [a6d7828f4c50](https://github.com/llvm/llvm-project/commit/a6d7828f4c50) PiperOrigin-RevId: 633354390 --- third_party/llvm/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index d795d29b2a9434..243cc54703c4ec 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "1066eb55477044a3a92f3a40471375194dfcdbc8" - LLVM_SHA256 = "dc0fc82c184cb9ccd094f5ff821914d610d79529e82f0cbcb91ddedebff29a0a" + LLVM_COMMIT = "a6d7828f4c50c1ec7b0b5f61fe59d7a768175dcc" + LLVM_SHA256 = "c09ec3020fb6b136064ff32f53ac07067b0f12ccbf016ac69965e4e38d61a9c0" tf_http_archive( name = name, From 6b9a25f438944132acc54ecf2c59af53c10fd6a1 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 13 May 2024 15:48:45 -0700 Subject: [PATCH 071/478] [IFRT] Rename XlaProgram to HloProgram, move it from PJRT-IFRT to IFRT. HLO programs represented in MLIR are independent of PJRT or XLA as a compiler, so they don't belong in PJRT-IFRT. Also rename the Python binding API from get_xla_program to get_hlo_program, but this API is not used yet. Cleanup; no functional changes intended. PiperOrigin-RevId: 633356170 --- tensorflow/core/tfrt/ifrt/BUILD | 1 + .../core/tfrt/ifrt/ifrt_serving_executable.cc | 3 +- third_party/xla/xla/python/BUILD | 1 + third_party/xla/xla/python/ifrt/hlo/BUILD | 62 +++++++++++++++++++ .../xla/xla/python/ifrt/hlo/hlo_program.cc | 22 +++++++ .../xla/xla/python/ifrt/hlo/hlo_program.h | 54 ++++++++++++++++ .../hlo/hlo_program_serdes.cc} | 20 +++--- .../hlo/hlo_program_serdes_test.cc} | 10 +-- .../xla/xla/python/ifrt/ir/tests/BUILD | 1 + .../ifrt/ir/tests/executable_impl_test_lib.cc | 3 +- .../xla/xla/python/ifrt_proxy/common/BUILD | 2 +- third_party/xla/xla/python/pjrt_ifrt/BUILD | 41 +----------- .../xla/xla/python/pjrt_ifrt/pjrt_compiler.cc | 5 +- .../xla/xla/python/pjrt_ifrt/xla_compiler.cc | 1 - .../xla/xla/python/pjrt_ifrt/xla_compiler.h | 18 ------ .../pjrt_ifrt/xla_executable_impl_test_lib.cc | 3 +- third_party/xla/xla/python/py_client.cc | 3 +- third_party/xla/xla/python/py_program.cc | 21 ++++--- third_party/xla/xla/python/xla_client_test.py | 6 +- .../python/xla_extension/ifrt_programs.pyi | 2 +- 20 files changed, 186 insertions(+), 93 deletions(-) create mode 100644 third_party/xla/xla/python/ifrt/hlo/BUILD create mode 100644 third_party/xla/xla/python/ifrt/hlo/hlo_program.cc create mode 100644 third_party/xla/xla/python/ifrt/hlo/hlo_program.h rename third_party/xla/xla/python/{pjrt_ifrt/xla_program_serdes.cc => ifrt/hlo/hlo_program_serdes.cc} (85%) rename third_party/xla/xla/python/{pjrt_ifrt/xla_program_serdes_test.cc => ifrt/hlo/hlo_program_serdes_test.cc} (91%) diff --git a/tensorflow/core/tfrt/ifrt/BUILD b/tensorflow/core/tfrt/ifrt/BUILD index 369fecac053dbe..e0a3b4ed4553bc 100644 --- a/tensorflow/core/tfrt/ifrt/BUILD +++ b/tensorflow/core/tfrt/ifrt/BUILD @@ -95,6 +95,7 @@ cc_library( "@local_xla//xla/pjrt:host_callback", "@local_xla//xla/pjrt:pjrt_executable", "@local_xla//xla/python/ifrt", + "@local_xla//xla/python/ifrt/hlo:hlo_program", "@local_xla//xla/python/pjrt_ifrt", "@local_xla//xla/python/pjrt_ifrt:xla_ifrt", "@local_xla//xla/service:computation_placer_hdr", diff --git a/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.cc b/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.cc index c85c9a9d223b08..b65b9c68eb1f78 100644 --- a/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.cc +++ b/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.cc @@ -51,6 +51,7 @@ limitations under the License. #include "xla/python/ifrt/device.h" #include "xla/python/ifrt/executable.h" #include "xla/python/ifrt/future.h" +#include "xla/python/ifrt/hlo/hlo_program.h" #include "xla/python/ifrt/host_callback.h" #include "xla/python/ifrt/shape.h" #include "xla/python/ifrt/sharding.h" @@ -386,7 +387,7 @@ IfrtServingExecutable::CreateExecutableSynchronously( TF_ASSIGN_OR_RETURN( std::unique_ptr ifrt_executable, ifrt_client_->GetDefaultCompiler()->Compile( - std::make_unique( + std::make_unique( tf2hlo_result.mlir_hlo_module.get()), std::make_unique( xla_compile_options, loaded_host_callbacks))); diff --git a/third_party/xla/xla/python/BUILD b/third_party/xla/xla/python/BUILD index c1a36af8359a20..ec8631655c1243 100644 --- a/third_party/xla/xla/python/BUILD +++ b/third_party/xla/xla/python/BUILD @@ -389,6 +389,7 @@ cc_library( "//xla/python/ifrt", "//xla/python/ifrt:plugin_program", "//xla/python/ifrt:plugin_program_serdes", + "//xla/python/ifrt/hlo:hlo_program", "//xla/python/pjrt_ifrt", "//xla/python/pjrt_ifrt:xla_host_callback_proto_cc", "//xla/python/pjrt_ifrt:xla_ifrt", diff --git a/third_party/xla/xla/python/ifrt/hlo/BUILD b/third_party/xla/xla/python/ifrt/hlo/BUILD new file mode 100644 index 00000000000000..9925bf37459263 --- /dev/null +++ b/third_party/xla/xla/python/ifrt/hlo/BUILD @@ -0,0 +1,62 @@ +load("//xla:xla.bzl", "xla_cc_test") +load("//xla/tsl:tsl.bzl", "internal_visibility") +load("//xla/tsl:tsl.default.bzl", "get_compatible_with_portable") + +package( + # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], + default_visibility = internal_visibility([ + "//xla/python/ifrt:friends", + "//xla/python/ifrt:internal", + ]), +) + +cc_library( + name = "hlo_program", + srcs = ["hlo_program.cc"], + hdrs = ["hlo_program.h"], + compatible_with = get_compatible_with_portable(), + deps = [ + "//xla/python/ifrt", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:IR", + ], +) + +cc_library( + name = "hlo_program_serdes", + srcs = ["hlo_program_serdes.cc"], + compatible_with = get_compatible_with_portable(), + deps = [ + ":hlo_program", + "//xla/mlir_hlo:mhlo_passes", + "//xla/pjrt:mlir_to_hlo", + "//xla/python/ifrt:serdes", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:Support", + "@local_tsl//tsl/platform:status", + "@stablehlo//:stablehlo_portable_api", + "@stablehlo//:stablehlo_serialization", + ], + alwayslink = True, +) + +xla_cc_test( + name = "hlo_program_serdes_test", + srcs = ["hlo_program_serdes_test.cc"], + deps = [ + ":hlo_program", + ":hlo_program_serdes", + "//xla/mlir_hlo", + "//xla/pjrt:mlir_to_hlo", + "//xla/python/ifrt:serdes", + "@com_google_googletest//:gtest_main", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:FuncDialect", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Support", + ], +) diff --git a/third_party/xla/xla/python/ifrt/hlo/hlo_program.cc b/third_party/xla/xla/python/ifrt/hlo/hlo_program.cc new file mode 100644 index 00000000000000..3e79c3d4ec6079 --- /dev/null +++ b/third_party/xla/xla/python/ifrt/hlo/hlo_program.cc @@ -0,0 +1,22 @@ +/* Copyright 2023 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "xla/python/ifrt/hlo/hlo_program.h" + +namespace xla::ifrt { + +char HloProgram::ID = 0; + +} // namespace xla::ifrt diff --git a/third_party/xla/xla/python/ifrt/hlo/hlo_program.h b/third_party/xla/xla/python/ifrt/hlo/hlo_program.h new file mode 100644 index 00000000000000..79f291cb062c3b --- /dev/null +++ b/third_party/xla/xla/python/ifrt/hlo/hlo_program.h @@ -0,0 +1,54 @@ +/* Copyright 2023 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_PYTHON_IFRT_HLO_HLO_PROGRAM_H_ +#define XLA_PYTHON_IFRT_HLO_HLO_PROGRAM_H_ + +#include +#include +#include +#include + +#include "llvm/Support/ExtensibleRTTI.h" +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/OwningOpRef.h" // from @llvm-project +#include "xla/python/ifrt/program.h" + +namespace xla { +namespace ifrt { + +struct HloProgram : llvm::RTTIExtends { + HloProgram() = default; + explicit HloProgram(mlir::ModuleOp module) : mlir_module(module) {} + HloProgram(std::unique_ptr context, + mlir::OwningOpRef module) + : mlir_module(*module), + mlir_context(std::move(context)), + owning_mlir_module(std::move(module)) {} + + mlir::ModuleOp mlir_module; + + static char ID; // NOLINT + + private: + std::unique_ptr mlir_context; + mlir::OwningOpRef owning_mlir_module; +}; + +} // namespace ifrt +} // namespace xla + +#endif // XLA_PYTHON_IFRT_HLO_HLO_PROGRAM_H_ diff --git a/third_party/xla/xla/python/pjrt_ifrt/xla_program_serdes.cc b/third_party/xla/xla/python/ifrt/hlo/hlo_program_serdes.cc similarity index 85% rename from third_party/xla/xla/python/pjrt_ifrt/xla_program_serdes.cc rename to third_party/xla/xla/python/ifrt/hlo/hlo_program_serdes.cc index 83348b7b8b6e27..1360bfa0df5875 100644 --- a/third_party/xla/xla/python/pjrt_ifrt/xla_program_serdes.cc +++ b/third_party/xla/xla/python/ifrt/hlo/hlo_program_serdes.cc @@ -29,8 +29,8 @@ limitations under the License. #include "stablehlo/dialect/Serialization.h" // from @stablehlo #include "xla/mlir_hlo/mhlo/transforms/passes.h" #include "xla/pjrt/mlir_to_hlo.h" +#include "xla/python/ifrt/hlo/hlo_program.h" #include "xla/python/ifrt/serdes.h" -#include "xla/python/pjrt_ifrt/xla_compiler.h" #include "tsl/platform/status.h" namespace xla { @@ -39,7 +39,7 @@ namespace ifrt { namespace { // Library that provides stable serialization and deserialization of -// `xla::ifrt::XlaProgram`. Both serialization and deserialization require +// `xla::ifrt::HloProgram`. Both serialization and deserialization require // linking in this library. // // Serialization: @@ -50,21 +50,23 @@ namespace { // Deserialization: // ``` // TF_ASSIGN_OR_RETURN(auto deserialized, Deserialize(serialized)); -// auto xla_program = llvm::dyn_cast(deserialized); +// auto xla_program = llvm::dyn_cast(deserialized); // ``` -class XlaProgramSerDes : public llvm::RTTIExtends { +class HloProgramSerDes : public llvm::RTTIExtends { public: absl::string_view type_name() const override { + // TODO(phawkins): whenever we next break compatibility, change this to + // "xla::ifrt::HloProgram". return "xla::ifrt::XlaProgram"; } absl::StatusOr Serialize(Serializable& serializable) override { - // Currently, PjRT-IFRT accepts an `XlaProgram` that contains C/MHLO. Since + // Currently, PjRT-IFRT accepts an `HloProgram` that contains C/MHLO. Since // these dialects don't provide version compatibility, the following // converts the module into StableHLO and use its portable serialization. - const auto& program = llvm::cast(serializable); + const auto& program = llvm::cast(serializable); if (program.mlir_module == nullptr) { return absl::InvalidArgumentError("Unable to serialize null MLIR module"); } @@ -97,17 +99,17 @@ class XlaProgramSerDes : public llvm::RTTIExtends { return absl::InvalidArgumentError("StableHLO => MHLO failed"); } - return std::make_unique(std::move(context), std::move(module)); + return std::make_unique(std::move(context), std::move(module)); } static char ID; // NOLINT }; -char XlaProgramSerDes::ID = 0; // NOLINT +char HloProgramSerDes::ID = 0; // NOLINT // clang-format off bool register_xla_program_serdes = ([]() { - RegisterSerDes(std::make_unique()); + RegisterSerDes(std::make_unique()); }(), true); // clang-format on diff --git a/third_party/xla/xla/python/pjrt_ifrt/xla_program_serdes_test.cc b/third_party/xla/xla/python/ifrt/hlo/hlo_program_serdes_test.cc similarity index 91% rename from third_party/xla/xla/python/pjrt_ifrt/xla_program_serdes_test.cc rename to third_party/xla/xla/python/ifrt/hlo/hlo_program_serdes_test.cc index 41ce1c6ab67a3d..2febe7ddf31896 100644 --- a/third_party/xla/xla/python/pjrt_ifrt/xla_program_serdes_test.cc +++ b/third_party/xla/xla/python/ifrt/hlo/hlo_program_serdes_test.cc @@ -27,8 +27,8 @@ limitations under the License. #include "mlir/Support/DebugStringHelper.h" // from @llvm-project #include "xla/mlir_hlo/mhlo/IR/hlo_ops.h" #include "xla/pjrt/mlir_to_hlo.h" +#include "xla/python/ifrt/hlo/hlo_program.h" #include "xla/python/ifrt/serdes.h" -#include "xla/python/pjrt_ifrt/xla_compiler.h" namespace xla { namespace ifrt { @@ -37,7 +37,7 @@ namespace { using ::testing::IsNull; using ::testing::Not; -TEST(XlaProgramSerDesTest, RoundTrip) { +TEST(HloProgramSerDesTest, RoundTrip) { static constexpr absl::string_view kMlirModuleStr = R"( module { func.func @main(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> { @@ -56,13 +56,13 @@ module { mlir::OwningOpRef module, xla::ParseMlirModuleString(kMlirModuleStr, *context)); auto program = - std::make_unique(std::move(context), std::move(module)); + std::make_unique(std::move(context), std::move(module)); TF_ASSERT_OK_AND_ASSIGN(serialized, Serialize(*program)); } TF_ASSERT_OK_AND_ASSIGN( - std::unique_ptr xla_program, - Deserialize(serialized, /*options=*/nullptr)); + std::unique_ptr xla_program, + Deserialize(serialized, /*options=*/nullptr)); // Verify that the deserialized program has no StableHLO ops. bool has_unsupported_dialect = false; diff --git a/third_party/xla/xla/python/ifrt/ir/tests/BUILD b/third_party/xla/xla/python/ifrt/ir/tests/BUILD index 6b173ba429608c..65fb16d0a7813a 100644 --- a/third_party/xla/xla/python/ifrt/ir/tests/BUILD +++ b/third_party/xla/xla/python/ifrt/ir/tests/BUILD @@ -86,6 +86,7 @@ cc_library( "//xla/pjrt:pjrt_executable", "//xla/python/ifrt", "//xla/python/ifrt:test_util", + "//xla/python/ifrt/hlo:hlo_program", "//xla/python/ifrt/ir:compiler", "//xla/python/ifrt/ir:sharding_param", "//xla/python/pjrt_ifrt:xla_ifrt", diff --git a/third_party/xla/xla/python/ifrt/ir/tests/executable_impl_test_lib.cc b/third_party/xla/xla/python/ifrt/ir/tests/executable_impl_test_lib.cc index 25d0c05f3fcb27..8cbb44f908f0e2 100644 --- a/third_party/xla/xla/python/ifrt/ir/tests/executable_impl_test_lib.cc +++ b/third_party/xla/xla/python/ifrt/ir/tests/executable_impl_test_lib.cc @@ -28,6 +28,7 @@ limitations under the License. #include "xla/python/ifrt/device.h" #include "xla/python/ifrt/dtype.h" #include "xla/python/ifrt/executable.h" +#include "xla/python/ifrt/hlo/hlo_program.h" #include "xla/python/ifrt/ir/compiler.h" #include "xla/python/ifrt/ir/sharding_param.h" #include "xla/python/ifrt/ir/tests/executable_impl_test_base.h" @@ -304,7 +305,7 @@ module { TF_ASSERT_OK_AND_ASSIGN( std::unique_ptr child_exec, client_->GetDefaultCompiler()->Compile( - std::make_unique(*mhlo_module), + std::make_unique(*mhlo_module), std::make_unique(std::move(xla_options)))); std::string source = R"( diff --git a/third_party/xla/xla/python/ifrt_proxy/common/BUILD b/third_party/xla/xla/python/ifrt_proxy/common/BUILD index 8bca5749c2aa98..30e93ada9ccfda 100644 --- a/third_party/xla/xla/python/ifrt_proxy/common/BUILD +++ b/third_party/xla/xla/python/ifrt_proxy/common/BUILD @@ -160,7 +160,7 @@ cc_library( name = "common_serdes", deps = [ "//xla/python/ifrt:plugin_program_serdes", - "//xla/python/pjrt_ifrt:xla_program_serdes", + "//xla/python/ifrt/hlo:hlo_program_serdes", ], alwayslink = True, ) diff --git a/third_party/xla/xla/python/pjrt_ifrt/BUILD b/third_party/xla/xla/python/pjrt_ifrt/BUILD index fc841a77512c7b..2ce7d49e13e4df 100644 --- a/third_party/xla/xla/python/pjrt_ifrt/BUILD +++ b/third_party/xla/xla/python/pjrt_ifrt/BUILD @@ -77,45 +77,6 @@ tf_proto_library( protodeps = ["//xla/pjrt:compile_options_proto"], ) -cc_library( - name = "xla_program_serdes", - srcs = ["xla_program_serdes.cc"], - compatible_with = get_compatible_with_portable(), - deps = [ - ":xla_ifrt", - "//xla/mlir_hlo:mhlo_passes", - "//xla/pjrt:mlir_to_hlo", - "//xla/python/ifrt:serdes", - "@com_google_absl//absl/status:statusor", - "@com_google_absl//absl/strings", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:Pass", - "@llvm-project//mlir:Support", - "@local_tsl//tsl/platform:status", - "@stablehlo//:stablehlo_portable_api", - "@stablehlo//:stablehlo_serialization", - ], - alwayslink = True, -) - -xla_cc_test( - name = "xla_program_serdes_test", - srcs = ["xla_program_serdes_test.cc"], - deps = [ - ":xla_ifrt", - ":xla_program_serdes", - "//xla/mlir_hlo", - "//xla/pjrt:mlir_to_hlo", - "//xla/python/ifrt:serdes", - "@com_google_googletest//:gtest_main", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:FuncDialect", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:Support", - ], -) - tf_proto_library( name = "xla_sharding_proto", srcs = ["xla_sharding.proto"], @@ -174,6 +135,7 @@ cc_library( "//xla/pjrt:pjrt_executable", "//xla/python/ifrt", "//xla/python/ifrt:test_util", + "//xla/python/ifrt/hlo:hlo_program", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", @@ -259,6 +221,7 @@ cc_library( "//xla/pjrt:pjrt_layout", "//xla/pjrt:utils", "//xla/python/ifrt", + "//xla/python/ifrt/hlo:hlo_program", "//xla/service:hlo_proto_cc", "//xla/translate/mhlo_to_hlo:type_to_shape", "//xla/tsl/concurrency:ref_count", diff --git a/third_party/xla/xla/python/pjrt_ifrt/pjrt_compiler.cc b/third_party/xla/xla/python/pjrt_ifrt/pjrt_compiler.cc index bb7aaad5e02df0..7245857e70cdb5 100644 --- a/third_party/xla/xla/python/pjrt_ifrt/pjrt_compiler.cc +++ b/third_party/xla/xla/python/pjrt_ifrt/pjrt_compiler.cc @@ -21,6 +21,7 @@ limitations under the License. #include "absl/status/status.h" #include "llvm/Support/Casting.h" +#include "xla/python/ifrt/hlo/hlo_program.h" #include "xla/python/pjrt_ifrt/pjrt_client.h" #include "xla/python/pjrt_ifrt/pjrt_executable.h" #include "xla/python/pjrt_ifrt/xla_compiler.h" @@ -34,9 +35,9 @@ char PjRtCompiler::ID = 0; absl::StatusOr> PjRtCompiler::Compile( std::unique_ptr program, std::unique_ptr options) { DCHECK(this); - const auto* xla_program = llvm::dyn_cast(program.get()); + const auto* xla_program = llvm::dyn_cast(program.get()); if (xla_program == nullptr) { - return absl::InvalidArgumentError("PjRtCompiler requires an XlaProgram"); + return absl::InvalidArgumentError("PjRtCompiler requires an HloProgram"); } TF_ASSIGN_OR_RETURN(auto xla_compile_options, GetXlaCompileOptions(std::move(options))); diff --git a/third_party/xla/xla/python/pjrt_ifrt/xla_compiler.cc b/third_party/xla/xla/python/pjrt_ifrt/xla_compiler.cc index 165742ad28904c..279fb27ad9d864 100644 --- a/third_party/xla/xla/python/pjrt_ifrt/xla_compiler.cc +++ b/third_party/xla/xla/python/pjrt_ifrt/xla_compiler.cc @@ -82,7 +82,6 @@ bool register_xla_compile_options_serdes = ([]{ } // namespace -char XlaProgram::ID = 0; char XlaCompileOptions::ID = 0; char XlaDeserializeExecutableOptions::ID = 0; diff --git a/third_party/xla/xla/python/pjrt_ifrt/xla_compiler.h b/third_party/xla/xla/python/pjrt_ifrt/xla_compiler.h index dc670d52418a6d..ad6a8442249866 100644 --- a/third_party/xla/xla/python/pjrt_ifrt/xla_compiler.h +++ b/third_party/xla/xla/python/pjrt_ifrt/xla_compiler.h @@ -31,24 +31,6 @@ limitations under the License. namespace xla { namespace ifrt { -struct XlaProgram : llvm::RTTIExtends { - XlaProgram() = default; - explicit XlaProgram(mlir::ModuleOp module) : mlir_module(module) {} - XlaProgram(std::unique_ptr context, - mlir::OwningOpRef module) - : mlir_module(*module), - mlir_context(std::move(context)), - owning_mlir_module(std::move(module)) {} - - mlir::ModuleOp mlir_module; - - static char ID; // NOLINT - - private: - std::unique_ptr mlir_context; - mlir::OwningOpRef owning_mlir_module; -}; - // Wraps compilation options for an XLA computation. // // TODO(hyeontaek): Move this class out of pjrt_ifrt. diff --git a/third_party/xla/xla/python/pjrt_ifrt/xla_executable_impl_test_lib.cc b/third_party/xla/xla/python/pjrt_ifrt/xla_executable_impl_test_lib.cc index 077b635285bd12..e52c9c2a169290 100644 --- a/third_party/xla/xla/python/pjrt_ifrt/xla_executable_impl_test_lib.cc +++ b/third_party/xla/xla/python/pjrt_ifrt/xla_executable_impl_test_lib.cc @@ -34,6 +34,7 @@ limitations under the License. #include "xla/python/ifrt/device.h" #include "xla/python/ifrt/dtype.h" #include "xla/python/ifrt/executable.h" +#include "xla/python/ifrt/hlo/hlo_program.h" #include "xla/python/ifrt/memory.h" #include "xla/python/ifrt/shape.h" #include "xla/python/ifrt/sharding.h" @@ -97,7 +98,7 @@ absl::StatusOr> CompileOnDevices( } } } - return compiler->Compile(std::make_unique(*module), + return compiler->Compile(std::make_unique(*module), std::move(compile_options)); } diff --git a/third_party/xla/xla/python/py_client.cc b/third_party/xla/xla/python/py_client.cc index 5a67786bbe39d3..2321ec18abd3c9 100644 --- a/third_party/xla/xla/python/py_client.cc +++ b/third_party/xla/xla/python/py_client.cc @@ -64,6 +64,7 @@ limitations under the License. #include "xla/python/ifrt/device.h" #include "xla/python/ifrt/dtype.h" #include "xla/python/ifrt/executable.h" +#include "xla/python/ifrt/hlo/hlo_program.h" #include "xla/python/ifrt/host_callback.h" #include "xla/python/ifrt/memory.h" #include "xla/python/nb_absl_span.h" // IWYU pragma: keep @@ -437,7 +438,7 @@ PyClient::CompileIfrtProgram( TF_ASSIGN_OR_RETURN(mlir::OwningOpRef module, ParseMlirModuleString(mlir_module, context)); return CompileIfrtProgram( - client, std::make_unique(module.get()), + client, std::make_unique(module.get()), MakeIfrtCompileOptions(std::move(options), std::move(host_callbacks))); } diff --git a/third_party/xla/xla/python/py_program.cc b/third_party/xla/xla/python/py_program.cc index 230606f879038f..e85b0a5fda8770 100644 --- a/third_party/xla/xla/python/py_program.cc +++ b/third_party/xla/xla/python/py_program.cc @@ -34,6 +34,7 @@ limitations under the License. #include "xla/pjrt/pjrt_executable.h" #include "xla/pjrt/status_casters.h" #include "xla/python/ifrt/compiler.h" +#include "xla/python/ifrt/hlo/hlo_program.h" #include "xla/python/ifrt/host_callback.h" #include "xla/python/ifrt/plugin_program.h" #include "xla/python/pjrt_ifrt/xla_compiler.h" @@ -65,23 +66,23 @@ MakePluginCompileOptions() { return std::make_unique(); } -absl::StatusOr> MakeXlaProgram( +absl::StatusOr> MakeHloProgram( absl::string_view mlir_module) { auto context = std::make_unique(); TF_ASSIGN_OR_RETURN(mlir::OwningOpRef module, ParseMlirModuleString(mlir_module, *context)); - return std::make_unique(std::move(context), + return std::make_unique(std::move(context), std::move(module)); } -absl::StatusOr> MakeXlaProgramFromString( +absl::StatusOr> MakeHloProgramFromString( std::string mlir_module) { - return MakeXlaProgram(mlir_module); + return MakeHloProgram(mlir_module); } -absl::StatusOr> MakeXlaProgramFromBytes( +absl::StatusOr> MakeHloProgramFromBytes( nb::bytes mlir_module) { - return MakeXlaProgram( + return MakeHloProgram( absl::string_view(mlir_module.c_str(), mlir_module.size())); } @@ -109,10 +110,10 @@ void BuildIfrtProgramsSubmodule(nanobind::module_& m) { nb::class_ ifrt_compile_options_base_class( sub_module, "CompileOptions"); sub_module - .def("make_xla_program", - xla::ValueOrThrowWrapper(MakeXlaProgramFromString)) - .def("make_xla_program", - xla::ValueOrThrowWrapper(MakeXlaProgramFromBytes)) + .def("make_hlo_program", + xla::ValueOrThrowWrapper(MakeHloProgramFromString)) + .def("make_hlo_program", + xla::ValueOrThrowWrapper(MakeHloProgramFromBytes)) .def("make_plugin_program", xla::ValueOrThrowWrapper(MakePluginProgramFromString)) .def("make_plugin_program", diff --git a/third_party/xla/xla/python/xla_client_test.py b/third_party/xla/xla/python/xla_client_test.py index afcacfdeafe80e..71549ecb3e2979 100644 --- a/third_party/xla/xla/python/xla_client_test.py +++ b/third_party/xla/xla/python/xla_client_test.py @@ -3076,15 +3076,15 @@ def testPluginProgramDoesNotCompile(self): program = xla_client.ifrt_programs.make_plugin_program("foobar") options = xla_client.ifrt_programs.make_plugin_compile_options() with self.assertRaisesRegex( - xla_client.XlaRuntimeError, "PjRtCompiler requires an XlaProgram" + xla_client.XlaRuntimeError, "PjRtCompiler requires an HloProgram" ): self.backend.compile_ifrt_program(program, options) @unittest.skipIf(pathways, "does not work with non-ifrt legacy pathways") - def testXlaProgramViaIfrtProgram(self): + def testHloProgramViaIfrtProgram(self): c = self._NewComputation() ops.Iota(c, xla_client.PrimitiveType.F32, 10) - program = xla_client.ifrt_programs.make_xla_program( + program = xla_client.ifrt_programs.make_hlo_program( xla_computation_to_mlir_module(c.build()) ) options = xla_client.ifrt_programs.make_xla_compile_options( diff --git a/third_party/xla/xla/python/xla_extension/ifrt_programs.pyi b/third_party/xla/xla/python/xla_extension/ifrt_programs.pyi index 58b0996b75797b..830df593b6e4fc 100644 --- a/third_party/xla/xla/python/xla_extension/ifrt_programs.pyi +++ b/third_party/xla/xla/python/xla_extension/ifrt_programs.pyi @@ -21,7 +21,7 @@ class Program: ... class CompileOptions: ... -def make_xla_program(mlir_module: Union[str, bytes]) -> Program: ... +def make_hlo_program(mlir_module: Union[str, bytes]) -> Program: ... def make_plugin_program(data: Union[str, bytes]) -> Program: ... From 8c004713d87c04183b4e3a6dc44838aca5bd488c Mon Sep 17 00:00:00 2001 From: Kuy Mainwaring Date: Mon, 13 May 2024 16:08:23 -0700 Subject: [PATCH 072/478] [XLA:GPU] Clang-tidy fixes for xla/service/gpu/fusions/copy.h PiperOrigin-RevId: 633361839 --- third_party/xla/xla/service/gpu/fusions/copy.h | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/xla/xla/service/gpu/fusions/copy.h b/third_party/xla/xla/service/gpu/fusions/copy.h index 00b9ce4506a5a3..53fd1fb3ddfd31 100644 --- a/third_party/xla/xla/service/gpu/fusions/copy.h +++ b/third_party/xla/xla/service/gpu/fusions/copy.h @@ -15,6 +15,7 @@ limitations under the License. #ifndef XLA_SERVICE_GPU_FUSIONS_COPY_H_ #define XLA_SERVICE_GPU_FUSIONS_COPY_H_ +#include #include #include "absl/status/statusor.h" From 96737ab5a239b663871eea6d55581ac8b2182ef2 Mon Sep 17 00:00:00 2001 From: Kuangyuan Chen Date: Mon, 13 May 2024 16:08:46 -0700 Subject: [PATCH 073/478] Stop generating corert const ops in tf_to_tfrt PiperOrigin-RevId: 633361967 --- .../tfrt/tests/tf_to_corert/const_tensor.mlir | 6 ++--- .../tfrt/tests/tf_to_corert/control_flow.mlir | 4 +-- .../tf_to_corert/decompose_resource_op.mlir | 3 +-- .../mlir/tfrt/transforms/tf_to_tfrt.cc | 25 ++++++++++--------- 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/mlir/tfrt/tests/tf_to_corert/const_tensor.mlir b/tensorflow/compiler/mlir/tfrt/tests/tf_to_corert/const_tensor.mlir index b208fe390acc3f..6596d650889384 100644 --- a/tensorflow/compiler/mlir/tfrt/tests/tf_to_corert/const_tensor.mlir +++ b/tensorflow/compiler/mlir/tfrt/tests/tf_to_corert/const_tensor.mlir @@ -9,12 +9,12 @@ func.func @string_tensor() -> (tensor<0x!tf_type.string>, tensor<7x!tf_type.stri func.return %0, %1 : tensor<0x!tf_type.string>, tensor<7x!tf_type.string> } -// Convert tf.Const to corert.const_dense_tensor only on cpu device +// Convert tf.Const to tfrt_fallback_async.const_dense_tensor only on cpu device // CHECK-LABEL: func @dense_tensor func.func @dense_tensor() -> tensor<4xui64> { - // CHECK: corert.const_dense_tensor dense<[1, 2, 3, 4]> : tensor<4xui64> + // CHECK: tfrt_fallback_async.const_dense_tensor dense<[1, 2, 3, 4]> : tensor<4xui64> %0 = "tf.Const"() {value = dense<[1, 2, 3, 4]> : tensor<4xui64>} : () -> tensor<4xui64> - // CHECK: corert.const_dense_tensor dense<1.000000e+00> : tensor<1xbf16> + // CHECK: tfrt_fallback_async.const_dense_tensor dense<1.000000e+00> : tensor<1xbf16> %1 = "tf.Const"() {device = "/device:CPU:0", value = dense<[1.0]> : tensor<1xbf16>} : () -> tensor<4xbf16> // CHECK: corert.executeop({{.*}}) "tf.Const"() {dtype = ui64, value = dense<[1, 2, 3, 4]> : tensor<4xui64>} : 1 %2 = "tf.Const"() {device = "/device:GPU:0", value = dense<[1, 2, 3, 4]> : tensor<4xui64>} : () -> tensor<4xui64> diff --git a/tensorflow/compiler/mlir/tfrt/tests/tf_to_corert/control_flow.mlir b/tensorflow/compiler/mlir/tfrt/tests/tf_to_corert/control_flow.mlir index e3562b286c47f3..ad3232042ca5e7 100644 --- a/tensorflow/compiler/mlir/tfrt/tests/tf_to_corert/control_flow.mlir +++ b/tensorflow/compiler/mlir/tfrt/tests/tf_to_corert/control_flow.mlir @@ -56,10 +56,8 @@ func.func @while_body_add2(%arg0: tensor) -> tensor { // CHECK-LABEL: func @while_test // CHECK-SAME: ([[ARG0:%.+]]: !tfrt.chain) -> (!tfrt.chain, !tfrt_fallback.tf_tensor) func.func @while_test() -> (tensor) { - // CHECK: [[CONST_TH:%.*]] = corert.const_dense_tensor dense<0> : tensor + // CHECK: [[CONST:%.*]] = tfrt_fallback_async.const_dense_tensor dense<0> : tensor %0 = "tf.Const"() {device = "/device:CPU:0", value = dense<0> : tensor} : () -> tensor - // CHECK: [[CONST:%.*]] = tfrt_fallback_async.corert_tensorhandle_to_fallback_tensor [[CONST_TH]] - // CHECK: (!corert.tensorhandle) -> (!tfrt_fallback.tf_tensor) // CHECK: [[pred_res:%.*]]:2 = tfrt.call @"while_cond_lt9/tfrt_predicate"([[ARG0]], [[CONST]]) : (!tfrt.chain, !tfrt_fallback.tf_tensor) -> (!tfrt.chain, i1) // CHECK: [[while_res:%.]]:2 = tfrt.while [[pred_res]]#1 @"while_body_add2/tfrt_body_1"([[pred_res]]#0, [[CONST]]) // CHECK-SAME: (!tfrt.chain, !tfrt_fallback.tf_tensor) -> (!tfrt.chain, !tfrt_fallback.tf_tensor) diff --git a/tensorflow/compiler/mlir/tfrt/tests/tf_to_corert/decompose_resource_op.mlir b/tensorflow/compiler/mlir/tfrt/tests/tf_to_corert/decompose_resource_op.mlir index ed308d02ad6e0b..ff0f0e7dbfd2cd 100644 --- a/tensorflow/compiler/mlir/tfrt/tests/tf_to_corert/decompose_resource_op.mlir +++ b/tensorflow/compiler/mlir/tfrt/tests/tf_to_corert/decompose_resource_op.mlir @@ -5,8 +5,7 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 12 : i32, p // CHECK-LABEL: func @gather // CHECK-SAME: ([[in_chain:%.*]]: !tfrt.chain // CHECK-SAME: [[arg0:%.*]]: !tfrt_fallback.tf_tensor, [[arg1:%.*]]: !tfrt_fallback.tf_tensor) -// CHECK: [[const_th:%.*]] = corert.const_dense_tensor -// CHECK-NEXT: [[const:%.*]] = tfrt_fallback_async.corert_tensorhandle_to_fallback_tensor [[const_th]] {device = "/job:localhost/replica:0/task:0/device:CPU:0"} +// CHECK: [[const:%.*]] = tfrt_fallback_async.const_dense_tensor // CHECK-NEXT: [[out_chain:%.*]], [[value:%.*]] = tfrt_fallback_async.executeop.seq([[in_chain]]) key(0) cost({{.*}}) device("/job:localhost/replica:0/task:0/device:CPU:0") "tf.ReadVariableOp"({{.*}}) // CHECK-NEXT: [[res:%.*]] = tfrt_fallback_async.executeop key(1) cost({{.*}}) device("/job:localhost/replica:0/task:0/device:CPU:0") "tf.GatherV2"([[value]], {{.*}}, [[const]]) // CHECK-NEXT: tfrt.return [[out_chain]], [[res]] : !tfrt.chain, !tfrt_fallback.tf_tensor diff --git a/tensorflow/compiler/mlir/tfrt/transforms/tf_to_tfrt.cc b/tensorflow/compiler/mlir/tfrt/transforms/tf_to_tfrt.cc index 024baa43c73132..f090745e0ae1c4 100644 --- a/tensorflow/compiler/mlir/tfrt/transforms/tf_to_tfrt.cc +++ b/tensorflow/compiler/mlir/tfrt/transforms/tf_to_tfrt.cc @@ -738,11 +738,11 @@ class FallbackBatchFunctionOpConversion // Lower a tf.Const op that creates a string tensor to a native // corert.create_string_tensor op. -class CoreRTConstDenseTensorOpConversion +class FallbackConstDenseTensorOpConversion : public mlir::OpConversionPattern { public: - CoreRTConstDenseTensorOpConversion(mlir::MLIRContext *context, - CoreRTConverter *corert_converter) + FallbackConstDenseTensorOpConversion(mlir::MLIRContext *context, + CoreRTConverter *corert_converter) : mlir::OpConversionPattern(context, kCoreRTBenefit), corert_converter_(*corert_converter) {} @@ -756,8 +756,8 @@ class CoreRTConstDenseTensorOpConversion if (auto parsed_device_name = corert_converter_.ParseDeviceName(op)) if (parsed_device_name->device_type != DEVICE_CPU) return failure(); - auto new_op = rewriter.create( - op.getLoc(), corert_converter_.tensor_handle_type(), + auto new_op = rewriter.create( + op.getLoc(), rewriter.getType(), mlir::cast(op.getValue())); rewriter.replaceOp(op, new_op->getResult(0)); return success(); @@ -860,11 +860,11 @@ class TFRTFuncOpSignatureConversion // Lower a tf.Const op that creates a string tensor to a native // corert.create_string_tensor op. -class CoreRTConstStringTensorOpConversion +class FallbackConstStringTensorOpConversion : public mlir::OpConversionPattern { public: - CoreRTConstStringTensorOpConversion(mlir::MLIRContext *context, - CoreRTConverter *corert_converter) + FallbackConstStringTensorOpConversion(mlir::MLIRContext *context, + CoreRTConverter *corert_converter) : mlir::OpConversionPattern(context, kCoreRTBenefit), corert_converter_(*corert_converter) {} @@ -890,8 +890,8 @@ class CoreRTConstStringTensorOpConversion for (auto dim : shape) dims.push_back(rewriter.getIntegerAttr(i64_type, dim)); - auto new_op = rewriter.create( - op.getLoc(), corert_converter_.tensor_handle_type(), + auto new_op = rewriter.create( + op.getLoc(), rewriter.getType(), rewriter.getArrayAttr(dims), rewriter.getArrayAttr(values)); rewriter.replaceOp(op, new_op.getResult()); @@ -1532,8 +1532,9 @@ void PopulateTFToTFRTConversionPatterns( // Here we use specialized patterns for tf.Const on CPU as it is incorrect to // use ExecuteOp pattern to convert string tensor attribute. - patterns->add(context, corert_converter); + patterns->add(context, + corert_converter); } // Lower TF dialect MLIR to TFRT dialect. From 53cc802c4002a59aa11032423c927494282039f7 Mon Sep 17 00:00:00 2001 From: Yang Chen Date: Mon, 13 May 2024 16:12:02 -0700 Subject: [PATCH 074/478] #tf-buildcop Fix test timeout. Error: ``` //tensorflow/core/data/service/snapshot:snapshot_stream_writer_checkpoint_test TIMEOUT in 3 out of 3 in 96.0s Stats over 3 runs: max = 96.0s, min = 66.2s, avg = 79.4s, dev = 12.4s tensorflow/core/data/service/snapshot/snapshot_stream_writer_checkpoint_test/test.log tensorflow/core/data/service/snapshot/snapshot_stream_writer_checkpoint_test/test_attempts/attempt_1.log tensorflow/core/data/service/snapshot/snapshot_stream_writer_checkpoint_test/test_attempts/attempt_2.log ``` PiperOrigin-RevId: 633362798 --- tensorflow/core/data/service/snapshot/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/data/service/snapshot/BUILD b/tensorflow/core/data/service/snapshot/BUILD index 523bfebc44dcb5..40b5cbaa6873aa 100644 --- a/tensorflow/core/data/service/snapshot/BUILD +++ b/tensorflow/core/data/service/snapshot/BUILD @@ -462,7 +462,7 @@ cc_library( tf_cc_test( name = "snapshot_stream_writer_checkpoint_test", - size = "small", + size = "medium", srcs = ["snapshot_stream_writer_checkpoint_test.cc"], deps = [ ":path_utils", From 7fd8e285c203dde472a2671a7cda8ba55bcb49d6 Mon Sep 17 00:00:00 2001 From: Kuy Mainwaring Date: Mon, 13 May 2024 16:12:02 -0700 Subject: [PATCH 075/478] [XLA:GPU] Clang-tidy fixes for xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc PiperOrigin-RevId: 633362802 --- .../xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc | 1 + .../xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.h | 1 + 2 files changed, 2 insertions(+) diff --git a/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc b/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc index 41108b774751b9..7daca77a7bd37a 100644 --- a/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc +++ b/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.cc @@ -22,6 +22,7 @@ limitations under the License. #include #include #include +#include #include "absl/algorithm/container.h" #include "absl/container/flat_hash_map.h" diff --git a/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.h b/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.h index 1c15af1f901d58..1ea02ac98cd120 100644 --- a/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.h +++ b/third_party/xla/xla/service/gpu/fusions/mlir/mlir_fusion_emitter.h @@ -18,6 +18,7 @@ limitations under the License. #include #include #include +#include #include "absl/container/flat_hash_set.h" #include "absl/status/status.h" From a7f0e0261b48393c207782f89c0cf991e45152b2 Mon Sep 17 00:00:00 2001 From: David Dunleavy Date: Mon, 13 May 2024 16:12:57 -0700 Subject: [PATCH 076/478] Use `--nobuild_tests_only` even on ARM builds of XLA PiperOrigin-RevId: 633363011 --- third_party/xla/.kokoro/linux/build.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/third_party/xla/.kokoro/linux/build.sh b/third_party/xla/.kokoro/linux/build.sh index 2a72099e5f5148..81251bd3f44a6c 100644 --- a/third_party/xla/.kokoro/linux/build.sh +++ b/third_party/xla/.kokoro/linux/build.sh @@ -62,7 +62,7 @@ if is_linux_gpu_job ; then UNSUPPORTED_GPU_TAGS="$(echo -requires-gpu-sm{80,86,89,90}{,-only})" TAGS_FILTER="${TAGS_FILTER},${UNSUPPORTED_GPU_TAGS// /,}" - ADDITIONAL_FLAGS="$ADDITIONAL_FLAGS --nobuild_tests_only --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute" + ADDITIONAL_FLAGS="$ADDITIONAL_FLAGS --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute" RBE_FLAGS="--config=rbe_linux_cuda_nvcc --jobs=150" echo "***NOTE: nvidia-smi lists the highest CUDA version the driver supports, which may be different than the version of CUDA actually used!!***" nvidia-smi @@ -79,17 +79,17 @@ else RBE_FLAGS="--config=rbe_cross_compile_linux_arm64_xla --jobs=150" else RBE_FLAGS="--config=rbe_linux_cpu --jobs=150" - ADDITIONAL_FLAGS="$ADDITIONAL_FLAGS --nobuild_tests_only" fi fi # Build & test XLA docker exec xla bazel \ test \ - --build_tag_filters=$TAGS_FILTER \ + --build_tag_filters=$TAGS_FILTER \ --test_tag_filters=$TAGS_FILTER \ --test_output=errors \ --keep_going \ + --nobuild_tests_only \ --features=layering_check \ --profile=/tf/pkg/profile.json.gz \ --flaky_test_attempts=3 \ From e4a0ecea4889d268e5f44f4532348df79c42a8f8 Mon Sep 17 00:00:00 2001 From: Kuy Mainwaring Date: Mon, 13 May 2024 16:21:02 -0700 Subject: [PATCH 077/478] [XLA:GPU] Clang-tidy fixes for xla/service/gpu/runtime/convolution_thunk.cc PiperOrigin-RevId: 633365188 --- third_party/xla/xla/service/gpu/runtime/BUILD | 5 +++-- .../xla/xla/service/gpu/runtime/convolution_thunk.cc | 11 ++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/third_party/xla/xla/service/gpu/runtime/BUILD b/third_party/xla/xla/service/gpu/runtime/BUILD index d5ad69a37b933c..d0b940e88b523e 100644 --- a/third_party/xla/xla/service/gpu/runtime/BUILD +++ b/third_party/xla/xla/service/gpu/runtime/BUILD @@ -512,18 +512,19 @@ cc_library( "TENSORFLOW_USE_ROCM=1", ]), deps = [ + ":thunk", "//xla:util", "//xla/service:buffer_assignment", "//xla/service/gpu:gpu_conv_runner", - "//xla/service/gpu:stream_executor_util", - "//xla/service/gpu/runtime:thunk", "//xla/stream_executor", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:inlined_vector", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:span", + "@local_tsl//tsl/platform:errors", ], ) diff --git a/third_party/xla/xla/service/gpu/runtime/convolution_thunk.cc b/third_party/xla/xla/service/gpu/runtime/convolution_thunk.cc index 6e8158d866aaf5..b7bba0fab3b418 100644 --- a/third_party/xla/xla/service/gpu/runtime/convolution_thunk.cc +++ b/third_party/xla/xla/service/gpu/runtime/convolution_thunk.cc @@ -15,17 +15,26 @@ limitations under the License. #include "xla/service/gpu/runtime/convolution_thunk.h" +#include #include #include +#include +#include #include "absl/container/inlined_vector.h" +#include "absl/log/check.h" #include "absl/status/status.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" #include "xla/service/buffer_assignment.h" #include "xla/service/gpu/gpu_conv_runner.h" -#include "xla/service/gpu/stream_executor_util.h" +#include "xla/service/gpu/runtime/thunk.h" +#include "xla/stream_executor/device_memory.h" +#include "xla/stream_executor/dnn.h" #include "xla/stream_executor/scratch_allocator.h" #include "xla/stream_executor/stream_executor.h" #include "xla/util.h" +#include "tsl/platform/errors.h" namespace xla { namespace gpu { From 60645902f1745b90232cce4e0116faf87e05948e Mon Sep 17 00:00:00 2001 From: Siqiao Wu Date: Mon, 13 May 2024 16:38:50 -0700 Subject: [PATCH 078/478] Add some internal change. PiperOrigin-RevId: 633369641 --- tensorflow/core/tfrt/ifrt/BUILD | 20 ++ .../ifrt/ifrt_restore_tensor_registry_test.cc | 190 ++++++++++++++++++ 2 files changed, 210 insertions(+) create mode 100644 tensorflow/core/tfrt/ifrt/ifrt_restore_tensor_registry_test.cc diff --git a/tensorflow/core/tfrt/ifrt/BUILD b/tensorflow/core/tfrt/ifrt/BUILD index e0a3b4ed4553bc..9db13a6387a286 100644 --- a/tensorflow/core/tfrt/ifrt/BUILD +++ b/tensorflow/core/tfrt/ifrt/BUILD @@ -299,6 +299,26 @@ cc_library( ], ) +tf_cc_test( + name = "ifrt_restore_tensor_registry_test", + srcs = ["ifrt_restore_tensor_registry_test.cc"], + tags = ["no_oss"], + deps = [ + ":ifrt_restore_tensor_registry", + "//tensorflow/compiler/mlir/tfrt/transforms/ifrt:ifrt_types", + "//tensorflow/core:framework", + "//tensorflow/core/framework:tensor", + "//tensorflow/core/framework:tensor_testutil", + "//tensorflow/core/framework:types_proto_cc", + "@com_google_absl//absl/status", + "@com_google_googletest//:gtest_main", + "@local_tsl//tsl/lib/core:status_test_util", + "@local_tsl//tsl/platform:status_matchers", + "@local_tsl//tsl/platform:statusor", + "@local_xla//xla/python/ifrt", + ], +) + tf_cc_test( name = "ifrt_loaded_variable_utils_test", srcs = ["ifrt_loaded_variable_utils_test.cc"], diff --git a/tensorflow/core/tfrt/ifrt/ifrt_restore_tensor_registry_test.cc b/tensorflow/core/tfrt/ifrt/ifrt_restore_tensor_registry_test.cc new file mode 100644 index 00000000000000..de0a27aecc4104 --- /dev/null +++ b/tensorflow/core/tfrt/ifrt/ifrt_restore_tensor_registry_test.cc @@ -0,0 +1,190 @@ +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tfrt/ifrt/ifrt_restore_tensor_registry.h" + +#include + +#include +#include +#include "absl/status/status.h" +#include "tensorflow/compiler/mlir/tfrt/transforms/ifrt/ifrt_types.h" +#include "xla/python/ifrt/future.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tsl/lib/core/status_test_util.h" +#include "tsl/platform/status_matchers.h" +#include "tsl/platform/statusor.h" + +using tsl::testing::IsOk; +using tsl::testing::StatusIs; + +namespace tensorflow { +namespace ifrt_serving { +namespace { + +TEST(IfrtRestoreTensorRegistryTest, RetrieveNonRegisteredTensorFails) { + IfrtRestoreTensorRegistry registry; + EXPECT_THAT(registry.GetRestoredTensor("input_tensor_1").Await(), + StatusIs(absl::StatusCode::kNotFound)); +} + +TEST(IfrtRestoreTensorRegistryTest, + RetrieveNonRegisteredTensorDTypeAndShapeFails) { + IfrtRestoreTensorRegistry registry; + EXPECT_THAT(registry.GetDtypeAndShape("input_tensor_1"), + StatusIs(absl::StatusCode::kNotFound)); +} + +TEST(IfrtRestoreTensorRegistryTest, SetNonExistedTensorAsUsedByHostFails) { + IfrtRestoreTensorRegistry registry; + EXPECT_THAT(registry.SetUsedByHost("input_tensor_1"), + StatusIs(absl::StatusCode::kNotFound)); +} + +TEST(IfrtRestoreTensorRegistryTest, RegisteredExistedTensorFails) { + auto input_tensor = + test::AsTensor({1, 2, 3, 4}, tensorflow::TensorShape({2, 2})); + auto promise = xla::ifrt::Future::CreatePromise(); + auto future = xla::ifrt::Future(promise); + + IfrtRestoreTensorRegistry::RestoredTensorInfo restored_tensor_info = { + .used_by_host = false, + .dtype_and_shape = + { + .dtype = DT_INT32, + .shape = tensorflow::TensorShape({2, 2}), + }, + .tensor_future = future}; + IfrtRestoreTensorRegistry registry; + EXPECT_THAT(registry.TryRegister("input_tensor_2", restored_tensor_info), + IsOk()); + promise.Set(input_tensor); + EXPECT_THAT(registry.TryRegister("input_tensor_2", restored_tensor_info), + StatusIs(absl::StatusCode::kAlreadyExists)); +} + +TEST(IfrtRestoreTensorRegistryTest, SetTensorAsUsedByHost) { + auto promise = xla::ifrt::Future::CreatePromise(); + auto future = xla::ifrt::Future(promise); + IfrtRestoreTensorRegistry::RestoredTensorInfo restored_tensor_info = { + .used_by_host = false, + .dtype_and_shape = + { + .dtype = DT_INT32, + .shape = tensorflow::TensorShape({2, 2}), + }, + .tensor_future = future}; + IfrtRestoreTensorRegistry registry; + EXPECT_THAT(registry.TryRegister("input_tensor_1", restored_tensor_info), + IsOk()); + EXPECT_THAT(registry.SetUsedByHost("input_tensor_1"), IsOk()); +} + +TEST(IfrtRestoreTensorRegistryTest, RegisteredTensorCanBeRetrieved) { + auto input_tensor = + test::AsTensor({1, 2, 3, 4}, tensorflow::TensorShape({2, 2})); + auto promise = xla::ifrt::Future::CreatePromise(); + auto future = xla::ifrt::Future(promise); + + IfrtRestoreTensorRegistry::RestoredTensorInfo restored_tensor_info = { + .used_by_host = false, + .dtype_and_shape = + { + .dtype = DT_INT32, + .shape = tensorflow::TensorShape({2, 2}), + }, + .tensor_future = future}; + IfrtRestoreTensorRegistry registry; + EXPECT_THAT(registry.TryRegister("input_tensor_1", restored_tensor_info), + IsOk()); + promise.Set(input_tensor); + TF_ASSERT_OK_AND_ASSIGN(tensorflow::Tensor retrieved, + registry.GetRestoredTensor("input_tensor_1").Await()); + test::ExpectEqual(retrieved, input_tensor); + TF_ASSERT_OK_AND_ASSIGN(DtypeAndShape dtype_and_shape, + registry.GetDtypeAndShape("input_tensor_1")); + EXPECT_TRUE( + dtype_and_shape.shape.IsSameSize(tensorflow::TensorShape({2, 2}))); + EXPECT_EQ(dtype_and_shape.dtype, DT_INT32); +} + +TEST(IfrtRestoreTensorRegistryTest, + RegisteredTensorDTypeAndShapeCanBeRetrieved) { + auto input_tensor = + test::AsTensor({1, 2, 3, 4}, tensorflow::TensorShape({2, 2})); + auto promise = xla::ifrt::Future::CreatePromise(); + auto future = xla::ifrt::Future(promise); + + IfrtRestoreTensorRegistry::RestoredTensorInfo restored_tensor_info = { + .used_by_host = false, + .dtype_and_shape = + { + .dtype = DT_INT32, + .shape = tensorflow::TensorShape({2, 2}), + }, + .tensor_future = future}; + IfrtRestoreTensorRegistry registry; + EXPECT_THAT(registry.TryRegister("input_tensor_1", restored_tensor_info), + IsOk()); + TF_ASSERT_OK_AND_ASSIGN(DtypeAndShape dtype_and_shape, + registry.GetDtypeAndShape("input_tensor_1")); + EXPECT_TRUE( + dtype_and_shape.shape.IsSameSize(tensorflow::TensorShape({2, 2}))); + EXPECT_EQ(dtype_and_shape.dtype, DT_INT32); +} + +TEST(IfrtRestoreTensorRegistryTest, FeezeTensorRegistry) { + auto input_tensor = + test::AsTensor({1, 2, 3, 4}, tensorflow::TensorShape({2, 2})); + auto promise1 = xla::ifrt::Future::CreatePromise(); + auto future1 = xla::ifrt::Future(promise1); + auto promise2 = xla::ifrt::Future::CreatePromise(); + auto future2 = xla::ifrt::Future(promise2); + + IfrtRestoreTensorRegistry::RestoredTensorInfo restored_tensor_info1 = { + .used_by_host = false, + .dtype_and_shape = + { + .dtype = DT_INT32, + .shape = tensorflow::TensorShape({2, 2}), + }, + .tensor_future = future1}; + IfrtRestoreTensorRegistry::RestoredTensorInfo restored_tensor_info2 = { + .used_by_host = true, + .dtype_and_shape = + { + .dtype = DT_INT32, + .shape = tensorflow::TensorShape({2, 2}), + }, + .tensor_future = future2}; + IfrtRestoreTensorRegistry registry; + TF_ASSERT_OK(registry.TryRegister("input_tensor_1", restored_tensor_info1)); + TF_ASSERT_OK(registry.TryRegister("input_tensor_2", restored_tensor_info2)); + promise1.Set(input_tensor); + promise2.Set(input_tensor); + registry.Freeze(); + // Tensor with `used_by_host` set to false will be freed after freeze. + EXPECT_THAT(registry.GetRestoredTensor("input_tensor_1").Await(), + StatusIs(absl::StatusCode::kUnavailable)); + // Tensor with `used_by_host` set to true will be kept after freeze. + TF_ASSERT_OK_AND_ASSIGN(tensorflow::Tensor retrieved, + registry.GetRestoredTensor("input_tensor_2").Await()); + test::ExpectEqual(retrieved, input_tensor); +} +} // namespace +} // namespace ifrt_serving +} // namespace tensorflow From 0cb4750928311f7d901f99739377f9a62c5f1d92 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 16:54:33 -0700 Subject: [PATCH 079/478] Migrate coord agent and service to use absl mutex/condvar libraries directly + some clang fixes. PiperOrigin-RevId: 633373388 --- .../distributed_runtime/coordination/BUILD | 10 ++- .../coordination/coordination_service.cc | 80 ++++++++++--------- .../coordination/coordination_service.h | 7 +- .../coordination_service_agent.cc | 53 ++++++------ .../coordination/coordination_service_agent.h | 5 +- 5 files changed, 80 insertions(+), 75 deletions(-) diff --git a/third_party/xla/xla/tsl/distributed_runtime/coordination/BUILD b/third_party/xla/xla/tsl/distributed_runtime/coordination/BUILD index 33cf2f09025912..bf67649bb5474f 100644 --- a/third_party/xla/xla/tsl/distributed_runtime/coordination/BUILD +++ b/third_party/xla/xla/tsl/distributed_runtime/coordination/BUILD @@ -49,11 +49,13 @@ cc_library( hdrs = ["coordination_service.h"], deps = [ ":coordination_client", + "@com_google_absl//absl/log", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", + "@local_tsl//tsl/platform:macros", "@local_tsl//tsl/platform:status", - "@local_tsl//tsl/platform:statusor", "@local_tsl//tsl/protobuf:coordination_config_proto_cc", ], ) @@ -72,15 +74,16 @@ tsl_gpu_library( "//xla/tsl/util:device_name_utils", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/hash", "@com_google_absl//absl/log", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", "@local_tsl//tsl/platform:env", "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:macros", - "@local_tsl//tsl/platform:mutex", "@local_tsl//tsl/platform:random", "@local_tsl//tsl/platform:status", "@local_tsl//tsl/platform:strcat", @@ -142,16 +145,15 @@ tsl_gpu_library( "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/log", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", "@local_tsl//tsl/framework:cancellation", "@local_tsl//tsl/lib/monitoring:gauge", "@local_tsl//tsl/platform:env", - "@local_tsl//tsl/platform:mutex", "@local_tsl//tsl/platform:random", "@local_tsl//tsl/platform:status", - "@local_tsl//tsl/platform:statusor", "@local_tsl//tsl/platform:thread_annotations", "@local_tsl//tsl/protobuf:coordination_config_proto_cc", "@local_tsl//tsl/protobuf:coordination_service_proto_cc", diff --git a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service.cc b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service.cc index 09622fe037e934..8c3a0fb1a57edf 100644 --- a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service.cc +++ b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service.cc @@ -16,6 +16,8 @@ limitations under the License. #include "xla/tsl/distributed_runtime/coordination/coordination_service.h" #include +#include +#include #include #include #include @@ -26,11 +28,13 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" +#include "absl/hash/hash.h" #include "absl/log/log.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" #include "absl/synchronization/notification.h" #include "absl/time/time.h" #include "xla/tsl/distributed_runtime/call_options.h" @@ -39,11 +43,8 @@ limitations under the License. #include "xla/tsl/util/device_name_utils.h" #include "tsl/platform/env.h" #include "tsl/platform/errors.h" -#include "tsl/platform/macros.h" -#include "tsl/platform/mutex.h" #include "tsl/platform/random.h" #include "tsl/platform/status.h" -#include "tsl/platform/strcat.h" #include "tsl/platform/thread_annotations.h" #include "tsl/protobuf/coordination_config.pb.h" #include "tsl/protobuf/coordination_service.pb.h" @@ -223,7 +224,7 @@ class CoordinationServiceStandaloneImpl : public CoordinationServiceInterface { CoordinatedTaskState state_ = CoordinatedTaskState::TASKSTATE_DISCONNECTED; absl::Status status_; - mutex last_heartbeat_mu_; + absl::Mutex last_heartbeat_mu_; uint64_t last_heartbeat_us_ TF_GUARDED_BY(last_heartbeat_mu_); // This denotes the deadline after which we stop accepting heartbeats from a // disconnected task. This grace period accounts for the lag time between @@ -252,19 +253,19 @@ class CoordinationServiceStandaloneImpl : public CoordinationServiceInterface { const std::string shutdown_barrier_id_ = absl::StrCat("Shutdown::", std::to_string(service_incarnation_)); - mutex state_mu_; + absl::Mutex state_mu_; absl::flat_hash_map> cluster_state_ TF_GUARDED_BY(state_mu_); DeviceInfo cluster_devices_ TF_GUARDED_BY(state_mu_); - mutex kv_mu_; + absl::Mutex kv_mu_; // Ordered map to store config key-values std::map kv_store_ TF_GUARDED_BY(kv_mu_); absl::flat_hash_map> get_cb_ TF_GUARDED_BY(kv_mu_); - mutex check_staleness_thread_shutdown_mu_; - condition_variable check_staleness_thread_cv_; + absl::Mutex check_staleness_thread_shutdown_mu_; + absl::CondVar check_staleness_thread_cv_; bool shutting_down_ TF_GUARDED_BY(check_staleness_thread_shutdown_mu_) = false; std::unique_ptr check_staleness_thread_; @@ -287,7 +288,7 @@ void CoordinationServiceStandaloneImpl::TaskState::SetConnected( state_ = CoordinatedTaskState::TASKSTATE_CONNECTED; status_ = absl::OkStatus(); task_incarnation_ = task_incarnation; - mutex_lock l(last_heartbeat_mu_); + absl::MutexLock l(&last_heartbeat_mu_); last_heartbeat_us_ = Env::Default()->NowMicros(); } @@ -314,14 +315,14 @@ absl::Status CoordinationServiceStandaloneImpl::TaskState::RecordHeartbeat( "Incarnation ID mismatch: expecting ", task_incarnation_, " but got ", task_incarnation, ". This means the remote task has restarted.")); } - mutex_lock l(last_heartbeat_mu_); + absl::MutexLock l(&last_heartbeat_mu_); last_heartbeat_us_ = Env::Default()->NowMicros(); return absl::OkStatus(); } int64_t CoordinationServiceStandaloneImpl::TaskState::TimeSinceLastHeartbeatMs() { - mutex_lock l(last_heartbeat_mu_); + absl::MutexLock l(&last_heartbeat_mu_); return (Env::Default()->NowMicros() - last_heartbeat_us_) / 1000; } @@ -388,8 +389,9 @@ void CoordinationServiceStandaloneImpl::StartCheckStaleness() { absl::flat_hash_map expired_barriers; while (true) { { - mutex_lock l(check_staleness_thread_shutdown_mu_); - check_staleness_thread_cv_.wait_for(l, std::chrono::seconds(1)); + absl::MutexLock l(&check_staleness_thread_shutdown_mu_); + check_staleness_thread_cv_.WaitWithTimeout( + &check_staleness_thread_shutdown_mu_, absl::Seconds(1)); if (shutting_down_) { return; } @@ -397,7 +399,7 @@ void CoordinationServiceStandaloneImpl::StartCheckStaleness() { // Heartbeat check. absl::Status status = absl::OkStatus(); { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); for (const auto& [task_name, task_state] : cluster_state_) { // Skip tasks that are not registered or in error state if (task_state->GetState() != @@ -444,7 +446,7 @@ void CoordinationServiceStandaloneImpl::StartCheckStaleness() { // Barrier timeout check. uint64_t current_time_micros = Env::Default()->NowMicros(); { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); // Gather barriers which have timed out. for (const std::string& barrier_id : ongoing_barriers_) { auto* barrier = &barriers_[barrier_id]; @@ -492,7 +494,7 @@ void CoordinationServiceStandaloneImpl::StartCheckStaleness() { void CoordinationServiceStandaloneImpl::Stop(bool shut_staleness_thread) { { - mutex_lock l(kv_mu_); + absl::MutexLock l(&kv_mu_); for (const auto& [key, get_kv_callbacks] : get_cb_) { for (const auto& get_kv_callback : get_kv_callbacks) { get_kv_callback(errors::Cancelled( @@ -504,7 +506,7 @@ void CoordinationServiceStandaloneImpl::Stop(bool shut_staleness_thread) { get_cb_.clear(); } { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); for (auto& [barrier_id, barrier] : barriers_) { if (!barrier.passed) { absl::Status error = MakeCoordinationError(errors::Aborted(absl::StrCat( @@ -519,9 +521,9 @@ void CoordinationServiceStandaloneImpl::Stop(bool shut_staleness_thread) { cluster_state_.clear(); } { - mutex_lock l(check_staleness_thread_shutdown_mu_); + absl::MutexLock l(&check_staleness_thread_shutdown_mu_); shutting_down_ = true; - check_staleness_thread_cv_.notify_all(); + check_staleness_thread_cv_.SignalAll(); } if (shut_staleness_thread) { check_staleness_thread_.reset(); @@ -559,7 +561,7 @@ absl::Status CoordinationServiceStandaloneImpl::RegisterTask( absl::Status error; std::string error_message; { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); if (!cluster_state_.contains(task_name)) { // Note: return early here as unexpected task register errors should not // be propagated to other tasks. @@ -629,7 +631,7 @@ void CoordinationServiceStandaloneImpl::WaitForAllTasks( const CoordinatedTask& task, const DeviceInfo& devices, StatusCallback done) { { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); const auto& task_state = cluster_state_.find(GetTaskName(task)); // Collect task device info for the first time that task // has called WaitForAllTasks(). This will be aggregated when the barrier @@ -652,7 +654,7 @@ void CoordinationServiceStandaloneImpl::ShutdownTaskAsync( } else { absl::Status status; { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); // Disconnect task from service individually. status = DisconnectTask(task); } @@ -662,7 +664,7 @@ void CoordinationServiceStandaloneImpl::ShutdownTaskAsync( absl::Status CoordinationServiceStandaloneImpl::ResetTask( const CoordinatedTask& task) { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); return DisconnectTask(task); } @@ -706,7 +708,7 @@ absl::Status CoordinationServiceStandaloneImpl::ReportTaskError( const CoordinatedTask& task, absl::Status error) { const std::string task_name = GetTaskName(task); { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); if (!cluster_state_.contains(task_name)) { return MakeCoordinationError( errors::InvalidArgument("Unexpected request from task ", task_name)); @@ -731,7 +733,7 @@ CoordinationServiceStandaloneImpl::GetTaskState( auto& state_info = states_info.emplace_back(); absl::Status error; { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); state_info.set_state(cluster_state_[task_name]->GetState()); error = cluster_state_[task_name]->GetStatus(); } @@ -751,7 +753,7 @@ absl::Status CoordinationServiceStandaloneImpl::RecordHeartbeat( const std::string task_name = GetTaskName(task); absl::Status s = absl::OkStatus(); { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); if (!cluster_state_.contains(task_name)) { return MakeCoordinationError(errors::InvalidArgument( "Unexpected heartbeat request from task: ", task_name, @@ -779,7 +781,7 @@ absl::Status CoordinationServiceStandaloneImpl::RecordHeartbeat( // Set and propagate any heartbeat errors. if (!s.ok()) { { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); SetTaskError(task_name, s); } PropagateError(task); @@ -827,7 +829,7 @@ void CoordinationServiceStandaloneImpl::PropagateError( if (isRecoverableJob(source_task.job_name())) return; absl::Status error; { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); error = cluster_state_[GetTaskName(source_task)]->GetStatus(); } assert(!error.ok()); @@ -843,7 +845,7 @@ void CoordinationServiceStandaloneImpl::PropagateError( std::vector task_names; { - tf_shared_lock l(state_mu_); + absl::ReaderMutexLock l(&state_mu_); task_names.reserve(cluster_state_.size()); for (const auto& pair : cluster_state_) { task_names.emplace_back(pair.first); @@ -851,7 +853,7 @@ void CoordinationServiceStandaloneImpl::PropagateError( } for (absl::string_view task : task_names) { { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); // Propagate error only to tasks that are connected if (cluster_state_[task]->GetState() != CoordinatedTaskState::TASKSTATE_CONNECTED) @@ -890,7 +892,7 @@ void CoordinationServiceStandaloneImpl::PropagateError( // The normalized key will not have leading or trailing slashes, and all parts // in the key path are separated by exactly one slack ('/'). // E.g., ///a//b/c// --> a/b/c -std::string NormalizeKey(const StringPiece orig_key) { +std::string NormalizeKey(absl::string_view orig_key) { std::string norm_key = std::string(orig_key); const char* src = norm_key.c_str(); std::string::iterator dst = norm_key.begin(); @@ -918,7 +920,7 @@ absl::Status CoordinationServiceStandaloneImpl::InsertKeyValue( const std::string& key, const std::string& value) { VLOG(3) << "InsertKeyValue(): " << key << ": " << value; const std::string norm_key = NormalizeKey(key); - mutex_lock l(kv_mu_); + absl::MutexLock l(&kv_mu_); if (kv_store_.find(norm_key) != kv_store_.end()) { return MakeCoordinationError( errors::AlreadyExists("Config key ", key, " already exists.")); @@ -938,7 +940,7 @@ void CoordinationServiceStandaloneImpl::GetKeyValueAsync( const std::string& key, StatusOrValueCallback done) { VLOG(3) << "GetKeyValue(): " << key; const std::string norm_key = NormalizeKey(key); - mutex_lock l(kv_mu_); + absl::MutexLock l(&kv_mu_); const auto& iter = kv_store_.find(norm_key); if (iter != kv_store_.end()) { done(iter->second); @@ -956,7 +958,7 @@ absl::StatusOr CoordinationServiceStandaloneImpl::TryGetKeyValue( const std::string& key) { VLOG(3) << "TryGetKeyValue(): " << key; const std::string norm_key = NormalizeKey(key); - mutex_lock l(kv_mu_); + absl::MutexLock l(&kv_mu_); const auto& iter = kv_store_.find(norm_key); if (iter == kv_store_.end()) { return errors::NotFound("Config key ", key, " not found."); @@ -971,7 +973,7 @@ std::vector CoordinationServiceStandaloneImpl::GetKeyValueDir( const std::string norm_key = NormalizeKey(directory_key); const std::string dir = absl::StrCat(norm_key, "/"); - mutex_lock l(kv_mu_); + absl::MutexLock l(&kv_mu_); // Find first key in ordered map that has the directory prefix. auto begin = kv_store_.lower_bound(dir); std::map::iterator it; @@ -996,7 +998,7 @@ absl::Status CoordinationServiceStandaloneImpl::DeleteKeyValue( const std::string& key) { VLOG(3) << "DeleteKeyValue(): " << key; const std::string norm_key = NormalizeKey(key); - mutex_lock l(kv_mu_); + absl::MutexLock l(&kv_mu_); // Delete directory: find key range that match directory prefix const std::string dir = absl::StrCat(norm_key, "/"); auto begin = kv_store_.lower_bound(dir); @@ -1054,7 +1056,7 @@ void CoordinationServiceStandaloneImpl::BarrierAsync( absl::StrCat("A non-participating task (", GetTaskName(task), ") called the barrier: ", barrier_id))); { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); // Check if coordination service has stopped. If so, return an error // immediately. if (ServiceHasStopped()) { @@ -1072,7 +1074,7 @@ void CoordinationServiceStandaloneImpl::BarrierAsync( done(error); return; } - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); // Check if coordination service has stopped. If so, return an error // immediately. if (ServiceHasStopped()) { @@ -1191,7 +1193,7 @@ void CoordinationServiceStandaloneImpl::BarrierAsync( absl::Status CoordinationServiceStandaloneImpl::CancelBarrier( const std::string& barrier_id, const CoordinatedTask& task) { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); auto [it, inserted] = barriers_.try_emplace(barrier_id); auto* barrier = &it->second; if (inserted) { diff --git a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service.h b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service.h index 29abe4cbb6aaaa..fbf3206c5e95d3 100644 --- a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service.h +++ b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef XLA_TSL_DISTRIBUTED_RUNTIME_COORDINATION_COORDINATION_SERVICE_H_ #define XLA_TSL_DISTRIBUTED_RUNTIME_COORDINATION_COORDINATION_SERVICE_H_ +#include #include #include #include @@ -23,12 +24,14 @@ limitations under the License. #include #include +#include "absl/log/log.h" #include "absl/status/status.h" +#include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "absl/time/time.h" #include "xla/tsl/distributed_runtime/coordination/coordination_client.h" +#include "tsl/platform/macros.h" #include "tsl/platform/status.h" -#include "tsl/platform/statusor.h" #include "tsl/protobuf/coordination_config.pb.h" namespace tsl { @@ -51,7 +54,7 @@ class Env; // execution in a cluster of multiple tasks. // // When enabled, the service keeps track of cluster configurations and the state -// of cluster members. TF runtime and libraries can use it to orchastrate +// of cluster members. TF runtime and libraries can use it to orchestrate // cluster initialization, check the healthiness of tasks, and propagate error // messages to the cluster. // diff --git a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.cc b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.cc index 1d090c7a0ffbc7..860d4fd65ab90b 100644 --- a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.cc +++ b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.cc @@ -33,6 +33,7 @@ limitations under the License. #include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "absl/strings/substitute.h" +#include "absl/synchronization/mutex.h" #include "absl/synchronization/notification.h" #include "absl/time/clock.h" #include "absl/time/time.h" @@ -42,7 +43,6 @@ limitations under the License. #include "tsl/framework/cancellation.h" #include "tsl/lib/monitoring/gauge.h" #include "tsl/platform/env.h" -#include "tsl/platform/mutex.h" #include "tsl/platform/random.h" #include "tsl/platform/status.h" #include "tsl/platform/thread_annotations.h" @@ -145,7 +145,7 @@ class CoordinationServiceAgentImpl : public CoordinationServiceAgent { CoordinationServiceConfig configs_; StatusCallback error_fn_; - mutable mutex state_mu_; + mutable absl::Mutex state_mu_; CoordinatedTaskState state_ TF_GUARDED_BY(state_mu_) = CoordinatedTaskState::TASKSTATE_UNINITIALIZED; absl::Status status_ TF_GUARDED_BY(state_mu_) = absl::OkStatus(); @@ -157,8 +157,8 @@ class CoordinationServiceAgentImpl : public CoordinationServiceAgent { uint64_t leader_incarnation_ = 0; DeviceInfo cluster_devices_; - mutex heartbeat_thread_shutdown_mu_; - condition_variable heartbeat_thread_cv_; + absl::Mutex heartbeat_thread_shutdown_mu_; + absl::CondVar heartbeat_thread_cv_; bool shutting_down_ TF_GUARDED_BY(heartbeat_thread_shutdown_mu_) = false; std::unique_ptr heartbeat_thread_; // Must outlive coordination client which may need to access it within @@ -187,7 +187,7 @@ absl::Status CoordinationServiceAgentImpl::Initialize( std::unique_ptr leader_client, StatusCallback error_fn) { enabled_usage_metric->GetCell()->Set(true); - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); if (state_ != CoordinatedTaskState::TASKSTATE_UNINITIALIZED) { return MakeCoordinationError(absl::FailedPreconditionError( "Coordination service agent has already been initialized.")); @@ -211,25 +211,25 @@ absl::Status CoordinationServiceAgentImpl::Initialize( } bool CoordinationServiceAgentImpl::IsInitialized() { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); return state_ != CoordinatedTaskState::TASKSTATE_UNINITIALIZED; } bool CoordinationServiceAgentImpl::IsConnected() { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); return state_ == CoordinatedTaskState::TASKSTATE_CONNECTED; } bool CoordinationServiceAgentImpl::IsError() { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); return state_ == CoordinatedTaskState::TASKSTATE_ERROR; } void CoordinationServiceAgentImpl::StopHeartbeat() { { - mutex_lock l(heartbeat_thread_shutdown_mu_); + absl::MutexLock l(&heartbeat_thread_shutdown_mu_); shutting_down_ = true; - heartbeat_thread_cv_.notify_all(); + heartbeat_thread_cv_.SignalAll(); } heartbeat_thread_ = nullptr; } @@ -237,7 +237,7 @@ void CoordinationServiceAgentImpl::StopHeartbeat() { absl::Status CoordinationServiceAgentImpl::Connect() { VLOG(3) << "Agent has started trying to Connect()."; { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); if (state_ != CoordinatedTaskState::TASKSTATE_DISCONNECTED) { return MakeCoordinationError(absl::FailedPreconditionError( "Coordination service agent is not in DISCONNECTED state.")); @@ -270,7 +270,7 @@ absl::Status CoordinationServiceAgentImpl::Connect() { if (s.ok()) { leader_incarnation_ = response.leader_incarnation(); { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); state_ = CoordinatedTaskState::TASKSTATE_CONNECTED; } } @@ -341,7 +341,7 @@ absl::Status CoordinationServiceAgentImpl::Connect() { // inflight heartbeats sent during shutdown and can be ignored. absl::SleepFor(absl::Seconds(1)); { - mutex_lock l(heartbeat_thread_shutdown_mu_); + absl::MutexLock l(&heartbeat_thread_shutdown_mu_); if (shutting_down_) { return; @@ -355,11 +355,10 @@ absl::Status CoordinationServiceAgentImpl::Connect() { } // Send next heartbeat after an interval. { - mutex_lock l(heartbeat_thread_shutdown_mu_); - // TODO(b/339231167): Fix the lint. - heartbeat_thread_cv_.wait_for( - l, std::chrono::milliseconds( // NOLINT(misc-include-cleaner) - heartbeat_interval_ms)); + absl::MutexLock l(&heartbeat_thread_shutdown_mu_); + heartbeat_thread_cv_.WaitWithTimeout( + &heartbeat_thread_shutdown_mu_, + absl::Milliseconds(heartbeat_interval_ms)); if (shutting_down_) { return; } @@ -437,7 +436,7 @@ CoordinationServiceAgentImpl::GetTaskState( absl::Status CoordinationServiceAgentImpl::ReportError( const absl::Status& error) { { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); if (state_ == CoordinatedTaskState::TASKSTATE_UNINITIALIZED) { return MakeCoordinationError(absl::FailedPreconditionError( "Coordination service agent must be initialized first before " @@ -484,7 +483,7 @@ absl::Status CoordinationServiceAgentImpl::ShutdownInternal() { absl::Status status = absl::OkStatus(); bool is_connected = false; { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); is_connected = state_ == CoordinatedTaskState::TASKSTATE_CONNECTED; } // Disconnect agent from service. @@ -522,7 +521,7 @@ absl::Status CoordinationServiceAgentImpl::ShutdownInternal() { // Tear down agent. StopHeartbeat(); { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); if (state_ == CoordinatedTaskState::TASKSTATE_ERROR) { const std::string status_message = absl::StrCat( "Shutdown() was called while coordination agent is in error state, " @@ -546,7 +545,7 @@ absl::Status CoordinationServiceAgentImpl::ShutdownInternal() { absl::Status CoordinationServiceAgentImpl::Reset() { { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); if (state_ != CoordinatedTaskState::TASKSTATE_ERROR) { return MakeCoordinationError(absl::FailedPreconditionError( "Reset() failed: coordination service agent is not in ERROR state.")); @@ -574,11 +573,11 @@ absl::Status CoordinationServiceAgentImpl::Reset() { // Reset agent state. StopHeartbeat(); { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); state_ = CoordinatedTaskState::TASKSTATE_DISCONNECTED; } { - mutex_lock l(heartbeat_thread_shutdown_mu_); + absl::MutexLock l(&heartbeat_thread_shutdown_mu_); shutting_down_ = false; } @@ -766,7 +765,7 @@ absl::Status CoordinationServiceAgentImpl::StopWatchKey(std::string_view key) { void CoordinationServiceAgentImpl::SetError(const absl::Status& error) { assert(!error.ok()); - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); if (state_ == CoordinatedTaskState::TASKSTATE_ERROR) return; LOG(ERROR) << "Coordination agent is set to ERROR: " << error; @@ -804,7 +803,7 @@ void CoordinationServiceAgentImpl::WaitAtBarrierAsync( return; } { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); auto [it, inserted] = used_barrier_ids_.insert(std::string(barrier_id)); if (!inserted) { done(absl::FailedPreconditionError(absl::StrCat( @@ -865,7 +864,7 @@ void CoordinationServiceAgentImpl::CancelBarrierAsync( // Returns an error if agent is not running. absl::Status CoordinationServiceAgentImpl::ValidateRunningAgent( bool allow_disconnected) { - mutex_lock l(state_mu_); + absl::MutexLock l(&state_mu_); switch (state_) { case CoordinatedTaskState::TASKSTATE_CONNECTED: return absl::OkStatus(); diff --git a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.h b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.h index 0f854ae42a101b..418ca50b0db122 100644 --- a/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.h +++ b/third_party/xla/xla/tsl/distributed_runtime/coordination/coordination_service_agent.h @@ -16,20 +16,19 @@ limitations under the License. #ifndef XLA_TSL_DISTRIBUTED_RUNTIME_COORDINATION_COORDINATION_SERVICE_AGENT_H_ #define XLA_TSL_DISTRIBUTED_RUNTIME_COORDINATION_COORDINATION_SERVICE_AGENT_H_ -#include #include #include #include #include #include -#include #include +#include "absl/status/status.h" +#include "absl/status/statusor.h" #include "absl/time/time.h" #include "xla/tsl/distributed_runtime/call_options.h" #include "xla/tsl/distributed_runtime/coordination/coordination_client.h" #include "tsl/platform/status.h" -#include "tsl/platform/statusor.h" #include "tsl/protobuf/coordination_service.pb.h" namespace tensorflow { From 4650838ce615ff4b2d2a74882cbeb0add745a82e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 17:18:54 -0700 Subject: [PATCH 080/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633379311 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index fb2f485604495b..7322cef85415f5 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/nodeserialize +go/debugstr op { name: "Abort" attr { From 4c5ee99fb2f6c23aae35883dbc9dcf8e648d85d6 Mon Sep 17 00:00:00 2001 From: Kyle Lucke Date: Mon, 13 May 2024 17:23:26 -0700 Subject: [PATCH 081/478] Move Trace CommandBuffer creation into TraceCommandBufferFactory to remove circular dependencies. PiperOrigin-RevId: 633380264 --- third_party/xla/xla/service/gpu/runtime/BUILD | 1 + .../service/gpu/runtime/command_buffer_cmd.cc | 17 +++--- third_party/xla/xla/stream_executor/BUILD | 19 ++++++- .../xla/xla/stream_executor/command_buffer.h | 31 +--------- third_party/xla/xla/stream_executor/gpu/BUILD | 1 + .../gpu/gpu_command_buffer_test.cc | 6 +- ...fer.cc => trace_command_buffer_factory.cc} | 22 +++++--- .../trace_command_buffer_factory.h | 56 +++++++++++++++++++ 8 files changed, 102 insertions(+), 51 deletions(-) rename third_party/xla/xla/stream_executor/{command_buffer.cc => trace_command_buffer_factory.cc} (74%) create mode 100644 third_party/xla/xla/stream_executor/trace_command_buffer_factory.h diff --git a/third_party/xla/xla/service/gpu/runtime/BUILD b/third_party/xla/xla/service/gpu/runtime/BUILD index d0b940e88b523e..c259a044cb8308 100644 --- a/third_party/xla/xla/service/gpu/runtime/BUILD +++ b/third_party/xla/xla/service/gpu/runtime/BUILD @@ -85,6 +85,7 @@ cc_library( "//xla/service/gpu/kernels:custom_kernel", "//xla/service/gpu/runtime:thunk", "//xla/stream_executor", + "//xla/stream_executor:trace_command_buffer_factory", "//xla/stream_executor/gpu:gpu_stream_header", "//xla/stream_executor/gpu:gpu_types_header", "//xla/tsl/concurrency:ref_count", diff --git a/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd.cc b/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd.cc index b415fdd14e15a5..5a6263fe3ef557 100644 --- a/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd.cc +++ b/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd.cc @@ -66,6 +66,7 @@ limitations under the License. #include "xla/stream_executor/launch_dim.h" #include "xla/stream_executor/stream.h" #include "xla/stream_executor/stream_executor.h" +#include "xla/stream_executor/trace_command_buffer_factory.h" #include "xla/tsl/concurrency/ref_count.h" #include "xla/types.h" // IWYU pragma: keep #include "xla/util.h" @@ -397,8 +398,9 @@ absl::StatusOr TracedCommandBuffer::GetOrTraceCommandBuffer( // Create a new entry by calling a user-provided tracing function, move it // to front and return a pointer to cached command buffer. if (entries_[i].command_buffer == nullptr) { - TF_ASSIGN_OR_RETURN(entries_[i].command_buffer, - se::CommandBuffer::Trace(executor, stream, trace)); + TF_ASSIGN_OR_RETURN( + entries_[i].command_buffer, + se::TraceCommandBufferFactory::Create(executor, stream, trace)); entries_[i].recorded_allocs.assign(allocs.begin(), allocs.end()); return shift_right(i).command_buffer.get(); } @@ -407,8 +409,9 @@ absl::StatusOr TracedCommandBuffer::GetOrTraceCommandBuffer( // Create a new entry by calling a user-provided tracing function, replace the // last entry with it, move it to front and return a pointer to cached command // buffer. - TF_ASSIGN_OR_RETURN(entries_[capacity_ - 1].command_buffer, - se::CommandBuffer::Trace(executor, stream, trace)); + TF_ASSIGN_OR_RETURN( + entries_[capacity_ - 1].command_buffer, + se::TraceCommandBufferFactory::Create(executor, stream, trace)); entries_[capacity_ - 1].recorded_allocs.assign(allocs.begin(), allocs.end()); return shift_right(capacity_ - 1).command_buffer.get(); } @@ -1209,7 +1212,7 @@ absl::Status CustomCallCmd::RecordLegacyCustomCall( #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TF_ASSIGN_OR_RETURN( auto nested_cmd, - se::CommandBuffer::Trace( + se::TraceCommandBufferFactory::Create( execute_params.stream->parent(), execute_params.command_buffer_trace_stream, [&](se::Stream* stream) { se::gpu::GpuStreamHandle gpu_stream = @@ -1289,7 +1292,7 @@ absl::Status CustomCallCmd::RecordXlaFfiCall( #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TF_ASSIGN_OR_RETURN( auto nested_cmd, - se::CommandBuffer::Trace( + se::TraceCommandBufferFactory::Create( execute_params.stream->parent(), execute_params.command_buffer_trace_stream, [&](se::Stream* stream) { ExecutableRunOptions run_options; @@ -1406,7 +1409,7 @@ absl::Status CollectiveCmd::AddTracedCommandBuffer( const RecordParams& record_params, se::CommandBuffer* command_buffer, absl::FunctionRef trace) { TF_ASSIGN_OR_RETURN(std::unique_ptr nested_cmd, - se::CommandBuffer::Trace( + se::TraceCommandBufferFactory::Create( execute_params.stream->parent(), execute_params.command_buffer_trace_stream, trace)); diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index eb14242b323b00..b73e82b9e42607 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -521,20 +521,33 @@ cc_library( cc_library( name = "command_buffer", - srcs = ["command_buffer.cc"], hdrs = ["command_buffer.h"], visibility = ["//visibility:private"], deps = [ ":device_memory", + ":kernel", ":launch_dim", ":platform", + "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/status", + "@com_google_absl//absl/types:span", + "@local_tsl//tsl/lib/gtl:int_type", + "@local_tsl//tsl/platform:errors", + ], +) + +cc_library( + name = "trace_command_buffer_factory", + srcs = ["trace_command_buffer_factory.cc"], + hdrs = ["trace_command_buffer_factory.h"], + visibility = [":internal"], + deps = [ + ":command_buffer", ":stream_executor_headers", ":stream_executor_interface", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", - "@com_google_absl//absl/types:span", - "@local_tsl//tsl/lib/gtl:int_type", "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:statusor", ], diff --git a/third_party/xla/xla/stream_executor/command_buffer.h b/third_party/xla/xla/stream_executor/command_buffer.h index a91963d46a0880..5cb39e857f7fbb 100644 --- a/third_party/xla/xla/stream_executor/command_buffer.h +++ b/third_party/xla/xla/stream_executor/command_buffer.h @@ -19,25 +19,21 @@ limitations under the License. #include #include #include -#include #include #include #include "absl/functional/any_invocable.h" #include "absl/status/status.h" -#include "absl/status/statusor.h" #include "absl/types/span.h" #include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/kernel.h" #include "xla/stream_executor/launch_dim.h" -#include "xla/stream_executor/platform.h" #include "tsl/lib/gtl/int_type.h" #include "tsl/platform/errors.h" namespace stream_executor { class Stream; -class StreamExecutorInterface; //===----------------------------------------------------------------------===// // CommandBuffer @@ -161,32 +157,6 @@ class CommandBuffer { // enum class Mode { kPrimary, kNested }; - //===--------------------------------------------------------------------===// - // Command buffer constructors - //===--------------------------------------------------------------------===// - - // Creates a new command buffer on the given executor by tracing `function` - // invocation. All StreamExecutor operations on a Stream argument will be - // recorded into the command buffer. Returned command buffer is finalized, and - // can't be updated. - // - // Command buffer tracing should be used only when it is impossible to use - // explicit construction APIs, e.g. when calling external libraries. By - // default we construct traced command buffers in nested mode because the - // primary use case for traced command buffers is to be inserted into primary - // command buffers constructed with explicit APIs. - static absl::StatusOr> Trace( - StreamExecutorInterface* executor, - absl::AnyInvocable function, - Mode mode = Mode::kNested); - - // Creates a new command buffer on the given executor by tracing `function` - // invocation using a user provided stream that will be passed to `function`. - static absl::StatusOr> Trace( - StreamExecutorInterface* executor, Stream* stream, - absl::AnyInvocable function, - Mode mode = Mode::kNested); - //===--------------------------------------------------------------------===// // Command buffer API //===--------------------------------------------------------------------===// @@ -379,6 +349,7 @@ class CommandBuffer { // Command buffer tracing API //--------------------------------------------------------------------------// private: + friend class TraceCommandBufferFactory; // Tracing APIs are private because they do not compose with command buffer // updates. Instead of tracing directly into the command buffer users should // create traced command buffers using factory methods and add them to primary diff --git a/third_party/xla/xla/stream_executor/gpu/BUILD b/third_party/xla/xla/stream_executor/gpu/BUILD index 48e951e0ed9c99..19b33618b223d3 100644 --- a/third_party/xla/xla/stream_executor/gpu/BUILD +++ b/third_party/xla/xla/stream_executor/gpu/BUILD @@ -694,6 +694,7 @@ xla_test( "//xla/stream_executor", "//xla/stream_executor:platform", "//xla/stream_executor:platform_manager", + "//xla/stream_executor:trace_command_buffer_factory", "//xla/stream_executor/gpu:gpu_driver_header", "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc index bd50ac29429b0e..d1999c819f9927 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc +++ b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc @@ -35,6 +35,7 @@ limitations under the License. #include "xla/stream_executor/platform_manager.h" #include "xla/stream_executor/stream.h" #include "xla/stream_executor/stream_executor.h" +#include "xla/stream_executor/trace_command_buffer_factory.h" #include "tsl/lib/core/status_test_util.h" #include "tsl/platform/errors.h" #include "tsl/platform/status.h" @@ -199,7 +200,7 @@ TEST(CudaCommandBufferTest, TraceSingleKernel) { KernelArgsDeviceMemoryArray args({a, b, c}, 0); // Create a command buffer by tracing kernel launch operations. - auto cmd_buffer = CommandBuffer::Trace( + auto cmd_buffer = TraceCommandBufferFactory::Create( executor, [&](Stream* stream) { return executor->Launch(stream, ThreadDim(), BlockDim(4), *add, args); @@ -1320,7 +1321,8 @@ static void BM_TraceCommandBuffer(benchmark::State& state) { return absl::OkStatus(); }; - CHECK_OK(CommandBuffer::Trace(executor, launch_kernels, nested)); + CHECK_OK( + TraceCommandBufferFactory::Create(executor, launch_kernels, nested)); } } diff --git a/third_party/xla/xla/stream_executor/command_buffer.cc b/third_party/xla/xla/stream_executor/trace_command_buffer_factory.cc similarity index 74% rename from third_party/xla/xla/stream_executor/command_buffer.cc rename to third_party/xla/xla/stream_executor/trace_command_buffer_factory.cc index cd28a135bf519c..6e66b897e32c28 100644 --- a/third_party/xla/xla/stream_executor/command_buffer.cc +++ b/third_party/xla/xla/stream_executor/trace_command_buffer_factory.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 The OpenXLA Authors. +/* Copyright 2024 The OpenXLA Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "xla/stream_executor/command_buffer.h" +#include "xla/stream_executor/trace_command_buffer_factory.h" #include #include @@ -21,24 +21,28 @@ limitations under the License. #include "absl/functional/any_invocable.h" #include "absl/status/status.h" #include "absl/status/statusor.h" -#include "xla/stream_executor/kernel.h" -#include "xla/stream_executor/stream.h" +#include "xla/stream_executor/command_buffer.h" #include "xla/stream_executor/stream_executor_interface.h" #include "tsl/platform/errors.h" #include "tsl/platform/statusor.h" namespace stream_executor { -absl::StatusOr> CommandBuffer::Trace( +absl::StatusOr> +TraceCommandBufferFactory::Create( StreamExecutorInterface* executor, - absl::AnyInvocable function, Mode mode) { + absl::AnyInvocable function, + CommandBuffer::Mode mode) { TF_ASSIGN_OR_RETURN(auto stream, executor->CreateStream()); - return Trace(executor, stream.get(), std::move(function), mode); + return TraceCommandBufferFactory::Create(executor, stream.get(), + std::move(function), mode); } -absl::StatusOr> CommandBuffer::Trace( +absl::StatusOr> +TraceCommandBufferFactory::Create( StreamExecutorInterface* executor, Stream* stream, - absl::AnyInvocable function, Mode mode) { + absl::AnyInvocable function, + CommandBuffer::Mode mode) { if (stream == nullptr) return absl::InvalidArgumentError( "Can't trace command buffer on a null stream"); diff --git a/third_party/xla/xla/stream_executor/trace_command_buffer_factory.h b/third_party/xla/xla/stream_executor/trace_command_buffer_factory.h new file mode 100644 index 00000000000000..9a88770c8d8b58 --- /dev/null +++ b/third_party/xla/xla/stream_executor/trace_command_buffer_factory.h @@ -0,0 +1,56 @@ +/* Copyright 2024 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef XLA_STREAM_EXECUTOR_TRACE_COMMAND_BUFFER_FACTORY_H_ +#define XLA_STREAM_EXECUTOR_TRACE_COMMAND_BUFFER_FACTORY_H_ + +#include + +#include "absl/functional/any_invocable.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "xla/stream_executor/command_buffer.h" +#include "xla/stream_executor/stream.h" +#include "xla/stream_executor/stream_executor_interface.h" + +namespace stream_executor { + +class TraceCommandBufferFactory { + public: + // Creates a new command buffer on the given executor by tracing `function` + // invocation. All StreamExecutor operations on a Stream argument will be + // recorded into the command buffer. Returned command buffer is finalized, and + // can't be updated. + // + // Command buffer tracing should be used only when it is impossible to use + // explicit construction APIs, e.g. when calling external libraries. By + // default we construct traced command buffers in nested mode because the + // primary use case for traced command buffers is to be inserted into primary + // command buffers constructed with explicit APIs. + static absl::StatusOr> Create( + StreamExecutorInterface* executor, + absl::AnyInvocable function, + CommandBuffer::Mode mode = CommandBuffer::Mode::kNested); + + // Creates a new command buffer on the given executor by tracing `function` + // invocation using a user provided stream that will be passed to `function`. + static absl::StatusOr> Create( + StreamExecutorInterface* executor, Stream* stream, + absl::AnyInvocable function, + CommandBuffer::Mode mode = CommandBuffer::Mode::kNested); +}; + +} // namespace stream_executor + +#endif // XLA_STREAM_EXECUTOR_TRACE_COMMAND_BUFFER_FACTORY_H_ From fa1ec11fc5277c5890d177cb91b8ed5878f5bbb9 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 13 May 2024 18:19:47 -0700 Subject: [PATCH 082/478] [PJRT:IFRT] Remove overload of PjRtLoadedExecutable::Create() that takes an unique_ptr argument. Cleanup only, NFC intended. PiperOrigin-RevId: 633393546 --- .../xla/xla/python/pjrt_ifrt/pjrt_compiler.cc | 13 +++++++++++-- .../xla/xla/python/pjrt_ifrt/pjrt_executable.cc | 16 ---------------- .../xla/xla/python/pjrt_ifrt/pjrt_executable.h | 6 ------ 3 files changed, 11 insertions(+), 24 deletions(-) diff --git a/third_party/xla/xla/python/pjrt_ifrt/pjrt_compiler.cc b/third_party/xla/xla/python/pjrt_ifrt/pjrt_compiler.cc index 7245857e70cdb5..028e480b79af2c 100644 --- a/third_party/xla/xla/python/pjrt_ifrt/pjrt_compiler.cc +++ b/third_party/xla/xla/python/pjrt_ifrt/pjrt_compiler.cc @@ -20,11 +20,18 @@ limitations under the License. #include #include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" #include "llvm/Support/Casting.h" +#include "xla/pjrt/pjrt_client.h" +#include "xla/python/ifrt/compiler.h" +#include "xla/python/ifrt/executable.h" #include "xla/python/ifrt/hlo/hlo_program.h" +#include "xla/python/ifrt/program.h" #include "xla/python/pjrt_ifrt/pjrt_client.h" #include "xla/python/pjrt_ifrt/pjrt_executable.h" #include "xla/python/pjrt_ifrt/xla_compiler.h" +#include "tsl/platform/logging.h" #include "tsl/platform/statusor.h" namespace xla { @@ -55,11 +62,13 @@ PjRtCompiler::DeserializeLoadedExecutable( TF_ASSIGN_OR_RETURN(auto xla_deserialize_options, GetXlaDeserializeExecutableOptions(std::move(options))); TF_ASSIGN_OR_RETURN( - auto pjrt_loaded_executble, + auto pjrt_loaded_executable, client_->pjrt_client()->DeserializeExecutable( serialized, std::move(xla_deserialize_options->compile_options))); return PjRtLoadedExecutable::Create( - client_, std::move(pjrt_loaded_executble), + client_, + std::shared_ptr( + std::move(pjrt_loaded_executable)), std::move(xla_deserialize_options->loaded_host_callbacks)); } diff --git a/third_party/xla/xla/python/pjrt_ifrt/pjrt_executable.cc b/third_party/xla/xla/python/pjrt_ifrt/pjrt_executable.cc index 7ae786400cd474..bf2930516a61bb 100644 --- a/third_party/xla/xla/python/pjrt_ifrt/pjrt_executable.cc +++ b/third_party/xla/xla/python/pjrt_ifrt/pjrt_executable.cc @@ -181,12 +181,6 @@ char PjRtCompatibleLoadedExecutable::ID = 0; char PjRtExecutable::ID = 0; char PjRtLoadedExecutable::ID = 0; -absl::StatusOr> PjRtExecutable::Create( - std::unique_ptr pjrt_executable) { - return std::unique_ptr(new PjRtExecutable( - std::shared_ptr(pjrt_executable.release()))); -} - absl::StatusOr> PjRtExecutable::Create( std::shared_ptr pjrt_executable) { return std::unique_ptr( @@ -203,16 +197,6 @@ absl::StatusOr PjRtExecutable::Serialize() const { return pjrt_executable_->SerializeExecutable(); } -absl::StatusOr> PjRtLoadedExecutable::Create( - PjRtCompatibleClient* client, - std::unique_ptr pjrt_loaded_executable, - std::vector> loaded_host_callbacks) { - return Create(client, - std::shared_ptr( - pjrt_loaded_executable.release()), - std::move(loaded_host_callbacks)); -} - absl::StatusOr> PjRtLoadedExecutable::Create( PjRtCompatibleClient* client, std::shared_ptr pjrt_loaded_executable, diff --git a/third_party/xla/xla/python/pjrt_ifrt/pjrt_executable.h b/third_party/xla/xla/python/pjrt_ifrt/pjrt_executable.h index 1aa8e2479126e4..fd59a236b15e20 100644 --- a/third_party/xla/xla/python/pjrt_ifrt/pjrt_executable.h +++ b/third_party/xla/xla/python/pjrt_ifrt/pjrt_executable.h @@ -81,8 +81,6 @@ class PjRtExecutable final : public llvm::RTTIExtends { public: // Creates PjRtExecutable from xla::PjRtExecutable. - static absl::StatusOr> Create( - std::unique_ptr pjrt_executable); static absl::StatusOr> Create( std::shared_ptr pjrt_executable); @@ -175,10 +173,6 @@ class PjRtLoadedExecutable final // Creates PjRtExecutable from xla::PjRtLoadedExecutable. We expect that // xla::PjRtLoadedExecutable has fixed output dtypes/shapes/shardings. // PjRtLoadedExecutable::GetHloModules() must be implemented. - static absl::StatusOr> Create( - PjRtCompatibleClient* client, - std::unique_ptr pjrt_loaded_executable, - std::vector> loaded_host_callbacks); static absl::StatusOr> Create( PjRtCompatibleClient* client, std::shared_ptr pjrt_loaded_executable, From 7a49d6de1169a87b199b0face45b46af9fcbbe75 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 19:17:41 -0700 Subject: [PATCH 083/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633404455 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 7322cef85415f5..2c64cab5e211e2 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugstr +go/nodeserialize op { name: "Abort" attr { From 3e4ed32a8ad25709dfd561d5c34cc13de537578d Mon Sep 17 00:00:00 2001 From: David Dunleavy Date: Mon, 13 May 2024 19:39:55 -0700 Subject: [PATCH 084/478] Mark `gpu_sparse_dot_test` as `no_oss` due to missing dep PiperOrigin-RevId: 633408111 --- third_party/xla/xla/service/gpu/tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/xla/xla/service/gpu/tests/BUILD b/third_party/xla/xla/service/gpu/tests/BUILD index 8b11e1de46be2f..1c3dfbf7311549 100644 --- a/third_party/xla/xla/service/gpu/tests/BUILD +++ b/third_party/xla/xla/service/gpu/tests/BUILD @@ -926,6 +926,7 @@ xla_test( "gpu_a100", "gpu_h100", ], + tags = ["no_oss"], # b/340304923 deps = if_cuda_is_configured( [ ":gpu_codegen_test", From 820f077cc176bf9942d7306bf11ef54e585b3474 Mon Sep 17 00:00:00 2001 From: Siqiao Wu Date: Mon, 13 May 2024 19:53:26 -0700 Subject: [PATCH 085/478] Add some internal change. PiperOrigin-RevId: 633410091 --- tensorflow/core/tfrt/ifrt/BUILD | 1 + .../ifrt/ifrt_executable_registry_test.cc | 60 +++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/tensorflow/core/tfrt/ifrt/BUILD b/tensorflow/core/tfrt/ifrt/BUILD index 9db13a6387a286..0bfb4aa0cb5752 100644 --- a/tensorflow/core/tfrt/ifrt/BUILD +++ b/tensorflow/core/tfrt/ifrt/BUILD @@ -490,6 +490,7 @@ tf_cc_test( "//tensorflow/core/framework:tensor", "//tensorflow/core/framework:types_proto_cc", "//tensorflow/core/platform:resource_loader", + "//tensorflow/core/platform:status_matchers", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", diff --git a/tensorflow/core/tfrt/ifrt/ifrt_executable_registry_test.cc b/tensorflow/core/tfrt/ifrt/ifrt_executable_registry_test.cc index 5dcdc6e80cca2c..e3ee0e43547771 100644 --- a/tensorflow/core/tfrt/ifrt/ifrt_executable_registry_test.cc +++ b/tensorflow/core/tfrt/ifrt/ifrt_executable_registry_test.cc @@ -40,6 +40,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/platform/resource_loader.h" +#include "tensorflow/core/platform/status_matchers.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/tfrt/ifrt/ifrt_loaded_variable_registry.h" #include "tensorflow/core/tfrt/ifrt/ifrt_restore_tensor_registry.h" @@ -118,6 +119,44 @@ TEST(IfrtExecutableRegistry, Basic) { ASSERT_EQ(executable_ptr, raw_ptr); } +TEST(IfrtExecutableRegistry, DuplicateRegistrationFails) { + mlir::DialectRegistry registry; + mlir::registerAllDialects(registry); + mlir::RegisterAllTensorFlowDialects(registry); + + mlir::MLIRContext context(registry); + + int64_t program_id = 1234; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr executable, + CreateIfrtServingExecutable(context, program_id)); + TF_ASSERT_OK_AND_ASSIGN(auto handle, ServingExecutableRegistry::Register( + program_id, std::move(executable))); + + EXPECT_THAT( + ServingExecutableRegistry::Register(program_id, std::move(executable)), + testing::StatusIs(absl::StatusCode::kAlreadyExists)); +} + +TEST(IfrtExecutableRegistry, ReleaseOk) { + mlir::DialectRegistry registry; + mlir::registerAllDialects(registry); + mlir::RegisterAllTensorFlowDialects(registry); + + mlir::MLIRContext context(registry); + + int64_t program_id = 1234; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr executable, + CreateIfrtServingExecutable(context, program_id)); + TF_ASSERT_OK_AND_ASSIGN(auto handle, ServingExecutableRegistry::Register( + program_id, std::move(executable))); + + handle.Release(); + + EXPECT_EQ(ServingExecutableRegistry::Lookup(program_id), nullptr); +} + TEST(IfrtExecutableRegistry, FreezeOk) { mlir::DialectRegistry registry; mlir::registerAllDialects(registry); @@ -142,6 +181,27 @@ TEST(IfrtExecutableRegistry, FreezeOk) { ASSERT_EQ(executable_ptr, raw_ptr); } +TEST(IfrtExecutableRegistry, FreezeFailedProgramNotRegistered) { + mlir::DialectRegistry registry; + mlir::registerAllDialects(registry); + mlir::RegisterAllTensorFlowDialects(registry); + + mlir::MLIRContext context(registry); + + int64_t program_id = 1234; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr executable, + CreateIfrtServingExecutable(context, program_id)); + + TF_ASSERT_OK_AND_ASSIGN(auto handle, ServingExecutableRegistry::Register( + program_id, std::move(executable))); + + handle.Release(); + + EXPECT_THAT(handle.Freeze(), + testing::StatusIs(absl::StatusCode::kFailedPrecondition)); +} + TEST(IfrtExecutableRegistry, InvalidProgramIdShallReturnNull) { int64_t program_id = 1234; From c76d67465b432d1d27fbfb737c63027302f1f7a6 Mon Sep 17 00:00:00 2001 From: Kuangyuan Chen Date: Mon, 13 May 2024 20:12:26 -0700 Subject: [PATCH 086/478] Improve test coverage of fallback_state.cc PiperOrigin-RevId: 633415502 --- tensorflow/core/tfrt/fallback/BUILD | 7 +++- .../core/tfrt/fallback/fallback_state_test.cc | 40 ++++++++++++++++++- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/tfrt/fallback/BUILD b/tensorflow/core/tfrt/fallback/BUILD index 454c102deec081..c77b4133def642 100644 --- a/tensorflow/core/tfrt/fallback/BUILD +++ b/tensorflow/core/tfrt/fallback/BUILD @@ -57,10 +57,13 @@ tf_cc_test( srcs = ["fallback_state_test.cc"], deps = [ ":fallback_state", - "//tensorflow/core:framework", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:const_op", + "//tensorflow/cc:ops", + "//tensorflow/cc:scope", + "//tensorflow/core:all_kernels", "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core/framework:function_proto_cc", "//tensorflow/core/platform:status_matchers", "//tensorflow/core/protobuf:error_codes_proto_impl_cc", ], diff --git a/tensorflow/core/tfrt/fallback/fallback_state_test.cc b/tensorflow/core/tfrt/fallback/fallback_state_test.cc index 352a5119e69460..d7d55311e7ffd4 100644 --- a/tensorflow/core/tfrt/fallback/fallback_state_test.cc +++ b/tensorflow/core/tfrt/fallback/fallback_state_test.cc @@ -14,11 +14,15 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/tfrt/fallback/fallback_state.h" -#include "tensorflow/core/framework/function.h" -#include "tensorflow/core/framework/function.pb.h" +#include + +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/framework/scope.h" +#include "tensorflow/cc/ops/const_op.h" #include "tensorflow/core/platform/status_matchers.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/protobuf/error_codes.pb.h" +#include "tsl/lib/core/status_test_util.h" namespace tensorflow { namespace { @@ -54,5 +58,37 @@ TEST(FallbackStateTest, CreateRendezvous) { HasSubstr("rendezvous")))); } +TEST(FallbackStateTest, CreateGraphExecutionState) { + tensorflow::SessionOptions session_options; + tensorflow::FunctionDefLibrary fdef_lib; + TF_ASSERT_OK_AND_ASSIGN( + auto fallback_state, + tfrt_stub::FallbackState::CreateWithCpuDevice(session_options, fdef_lib)); + + GraphDef graphdef; + { + auto scope = tensorflow::Scope::NewRootScope().WithDevice( + "/job:localhost/replica:0/task:0/device:CPU:0"); + + Output a = ops::Const(scope.WithOpName("a"), 2.0, {1, 1}); + + TF_ASSERT_OK(scope.ToGraphDef(&graphdef)); + } + + TF_ASSERT_OK_AND_ASSIGN( + auto graph_execution_state, + fallback_state->CreateGraphExecutionState(std::move(graphdef))); +} + +TEST(FallbackStateTest, CreateWithMockGpuDevice) { + tensorflow::SessionOptions session_options; + tensorflow::FunctionDefLibrary fdef_lib; + TF_ASSERT_OK_AND_ASSIGN(auto fallback_state, + tfrt_stub::FallbackState::CreateWithMockGpuDevice( + session_options, fdef_lib)); + const auto& device_manager = fallback_state->device_manager(); + EXPECT_GT(device_manager.NumDeviceType("GPU"), 0); +} + } // namespace } // namespace tensorflow From ff8a035af1e1364c29601b2bf7d5046eeeaeea23 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 13 May 2024 20:52:17 -0700 Subject: [PATCH 087/478] Integrate LLVM at llvm/llvm-project@e6b2197a89f5 Updates LLVM usage to match [e6b2197a89f5](https://github.com/llvm/llvm-project/commit/e6b2197a89f5) PiperOrigin-RevId: 633423574 --- third_party/llvm/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index 243cc54703c4ec..669cff9644af1a 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "a6d7828f4c50c1ec7b0b5f61fe59d7a768175dcc" - LLVM_SHA256 = "c09ec3020fb6b136064ff32f53ac07067b0f12ccbf016ac69965e4e38d61a9c0" + LLVM_COMMIT = "e6b2197a89f5d6d0f56a03c03b8afda561eee899" + LLVM_SHA256 = "a2398fc87e7c4ef96f17999a83240d421686718a8f095fed718b7539576c1fb7" tf_http_archive( name = name, From f29f62111ecb98a54951377bb0d7069a7d8c2314 Mon Sep 17 00:00:00 2001 From: Terry Heo Date: Mon, 13 May 2024 21:09:52 -0700 Subject: [PATCH 088/478] gpu_delegate: Update ADD / MUL broadcast support 4D + 3D works with broadcast when 4D batch is 1. PiperOrigin-RevId: 633426993 --- .../tools/versioning/gpu_compatibility.cc | 51 +++++++++++++++---- .../versioning/gpu_compatibility_test.cc | 42 +++++++++++++-- 2 files changed, 77 insertions(+), 16 deletions(-) diff --git a/tensorflow/lite/tools/versioning/gpu_compatibility.cc b/tensorflow/lite/tools/versioning/gpu_compatibility.cc index 29d5e27c4266c3..d8e1b12879a85b 100644 --- a/tensorflow/lite/tools/versioning/gpu_compatibility.cc +++ b/tensorflow/lite/tools/versioning/gpu_compatibility.cc @@ -435,6 +435,36 @@ absl::Status CheckCustomOpsGpuDelegateCompatibility(const OpSignature& op_sig) { absl::StrCat("Not supported custom op ", op_sig.custom_name)); } +absl::Status CheckAddMulBroadcastCompatibility( + const OpSignatureTensorSpec& input0, const OpSignatureTensorSpec& input1) { + if (input0.dims.size() > 1 && input1.dims.size() > 1 && + input0.dims.size() != input1.dims.size()) { + const std::vector*longer_dims, *shorter_dims; + if (input0.dims.size() >= input1.dims.size()) { + longer_dims = &input0.dims; + shorter_dims = &input1.dims; + } else { + longer_dims = &input1.dims; + shorter_dims = &input0.dims; + } + bool is_broadcastable = false; + + // Broadcasting 3D to 4D with batch 1 works. + if (longer_dims->size() == 4 && shorter_dims->size() == 3 && + longer_dims->at(0) == 1) { + is_broadcastable = true; + } + + if (!is_broadcastable) { + return absl::UnimplementedError( + absl::StrCat("Doesn't support broadcasting - input0: [", + absl::StrJoin(input0.dims, ","), "], input1: [", + absl::StrJoin(input1.dims, ","), "]")); + } + } + return absl::OkStatus(); +} + } // namespace // Logics here used to be in TFLiteOperationParser:IsSupported() @@ -449,12 +479,9 @@ absl::Status CheckGpuDelegateCompatibility(const OpSignature& op_sig) { } const auto& input0 = op_sig.inputs.at(0); const auto& input1 = op_sig.inputs.at(1); - if (input0.dims.size() > 1 && input1.dims.size() > 1 && - input0.dims.size() != input1.dims.size()) { - return absl::UnimplementedError( - absl::StrCat("ADD doesn't support broadcasting - input0: [", - absl::StrJoin(input0.dims, ","), "], input1: [", - absl::StrJoin(input1.dims, ","), "]")); + auto broadcastable = CheckAddMulBroadcastCompatibility(input0, input1); + if (!broadcastable.ok()) { + return broadcastable; } const TfLiteAddParams* tf_options; return RetrieveBuiltinData(op_sig, &tf_options); @@ -700,11 +727,13 @@ absl::Status CheckGpuDelegateCompatibility(const OpSignature& op_sig) { "MUL requires one tensor that not less than second in all " "dimensions."); } - } else if (input0.dims.size() > 1 && input1.dims.size() > 1) { - return absl::UnimplementedError( - absl::StrCat("MUL doesn't support broadcasting - input0: [", - absl::StrJoin(input0.dims, ","), "], input1: [", - absl::StrJoin(input1.dims, ","), "]")); + } else { + const auto& input0 = op_sig.inputs.at(0); + const auto& input1 = op_sig.inputs.at(1); + auto broadcastable = CheckAddMulBroadcastCompatibility(input0, input1); + if (!broadcastable.ok()) { + return broadcastable; + } } const TfLiteMulParams* tf_options; RETURN_IF_ERROR(RetrieveBuiltinData(op_sig, &tf_options)); diff --git a/tensorflow/lite/tools/versioning/gpu_compatibility_test.cc b/tensorflow/lite/tools/versioning/gpu_compatibility_test.cc index f3ca69dfc7e98d..8493137c1e0f90 100644 --- a/tensorflow/lite/tools/versioning/gpu_compatibility_test.cc +++ b/tensorflow/lite/tools/versioning/gpu_compatibility_test.cc @@ -94,7 +94,7 @@ TEST(CheckGpuDelegateCompatibility, FCConstInput) { "FullyConnected doesn't support constant input."); } -TEST(CheckGpuDelegateCompatibility, Add1DBroadCastSuccess) { +TEST(CheckGpuDelegateCompatibility, Add1Dto3DBroadcastSuccess) { OpSignature op_sig = OpSignature(); op_sig.op = BuiltinOperator_ADD; auto params = std::make_unique(); @@ -108,20 +108,52 @@ TEST(CheckGpuDelegateCompatibility, Add1DBroadCastSuccess) { EXPECT_TRUE(CheckGpuDelegateCompatibility(op_sig).message().empty()); } -TEST(CheckGpuDelegateCompatibility, Add2DBroadCastFail) { +TEST(CheckGpuDelegateCompatibility, Add2Dto3DBroadcastFail) { OpSignature op_sig = OpSignature(); op_sig.op = BuiltinOperator_ADD; auto params = std::make_unique(); op_sig.builtin_data = static_cast(params.get()); op_sig.inputs = std::vector(2); op_sig.inputs[0] = OpSignatureTensorSpec(); - op_sig.inputs[0].dims = {4, 1, 2}; + op_sig.inputs[0].dims = {1, 100, 256}; + op_sig.inputs[1] = OpSignatureTensorSpec(); + op_sig.inputs[1].dims = {100, 256}; + + EXPECT_EQ(CheckGpuDelegateCompatibility(op_sig).message(), + "Doesn't support broadcasting - input0: [1,100,256], input1: " + "[100,256]"); +} + +TEST(CheckGpuDelegateCompatibility, Add3Dto4DBroadcastFail) { + OpSignature op_sig = OpSignature(); + op_sig.op = BuiltinOperator_ADD; + auto params = std::make_unique(); + op_sig.builtin_data = static_cast(params.get()); + op_sig.inputs = std::vector(2); + op_sig.inputs[0] = OpSignatureTensorSpec(); + op_sig.inputs[0].dims = {4, 1, 1, 2}; op_sig.inputs[1] = OpSignatureTensorSpec(); - op_sig.inputs[1].dims = {2, 2}; + // Can't broadcast using batch of 4 + op_sig.inputs[1].dims = {1, 1, 2}; EXPECT_EQ( CheckGpuDelegateCompatibility(op_sig).message(), - "ADD doesn't support broadcasting - input0: [4,1,2], input1: [2,2]"); + "Doesn't support broadcasting - input0: [4,1,1,2], input1: [1,1,2]"); +} + +TEST(CheckGpuDelegateCompatibility, Add3Dto4DBroadcastSuccess) { + OpSignature op_sig = OpSignature(); + op_sig.op = BuiltinOperator_ADD; + auto params = std::make_unique(); + op_sig.builtin_data = static_cast(params.get()); + op_sig.inputs = std::vector(2); + op_sig.inputs[0] = OpSignatureTensorSpec(); + op_sig.inputs[0].dims = {1, 128, 513, 3}; + op_sig.inputs[1] = OpSignatureTensorSpec(); + // Can be broadcasted to {1, 128, 513, 3} + op_sig.inputs[1].dims = {128, 513, 3}; + + EXPECT_TRUE(CheckGpuDelegateCompatibility(op_sig).message().empty()); } } // namespace tflite From 53f8d3cd09d6716f51b8bef82b7b4d281c3b4850 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 21:19:27 -0700 Subject: [PATCH 089/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633428831 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 2c64cab5e211e2..53c362ed0ab819 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/nodeserialize +go/debugstr op { name: "Abort" attr { From b52925b10aa2bcd7503e6ed3af7a40335a751091 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Mon, 13 May 2024 21:54:14 -0700 Subject: [PATCH 090/478] Support reusing calibration data if exists Calibration is the most time-consuming step in quantization. This cl will help avoiding it in case of users already quantized it before and the changes in configuration if have doesn't affect the calibration results. PiperOrigin-RevId: 633435225 --- .../stablehlo/cc/calibration/BUILD | 2 + .../stablehlo/cc/calibration/component.cc | 99 ++++++++++------ .../stablehlo/cc/calibration/component.h | 12 +- .../stablehlo/cc/calibration/statistics.cc | 14 ++- .../stablehlo/cc/calibration/statistics.h | 13 +++ .../insert_calibration_statistics_saver.cc | 44 ++++--- .../quantization/stablehlo/passes/passes.h | 5 +- .../quantization/stablehlo/passes/passes.td | 7 ++ .../mlir/quantization/stablehlo/python/BUILD | 1 + .../integration_test/quantize_model_test.py | 108 +++++++++++++++++ .../quantize_model_test_base.py | 23 ++++ .../stablehlo/quantization_config.proto | 9 +- ...ration_statistics_saver_with_skipping.mlir | 47 ++++++++ .../integration_test/quantize_model_test.py | 110 ++++++++++++++++++ .../tensorflow/python/quantize_model.cc | 28 +++-- .../tensorflow/python/quantize_model.py | 2 +- 16 files changed, 453 insertions(+), 71 deletions(-) create mode 100644 tensorflow/compiler/mlir/quantization/stablehlo/tests/passes/insert_calibration_statistics_saver_with_skipping.mlir diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/BUILD b/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/BUILD index 7b7653c9db12c8..9926546f8c47a8 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/BUILD +++ b/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/BUILD @@ -61,9 +61,11 @@ cc_library( "//tensorflow/compiler/mlir/quantization/stablehlo/cc:types", "//tensorflow/compiler/mlir/quantization/tensorflow:exported_model_proto_cc", "//tensorflow/compiler/mlir/quantization/tensorflow:quantization_options_proto_cc", + "//tensorflow/compiler/mlir/quantization/tensorflow/calibrator:calibration_statistics_proto_cc", "//tensorflow/compiler/mlir/quantization/tensorflow/cc:run_passes", "//tensorflow/compiler/mlir/quantization/tensorflow/python:py_function_lib", "//tensorflow/core/protobuf:for_core_protos_cc", + "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/log", diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/component.cc b/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/component.cc index 2ec9b38865edfb..52db906e512391 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/component.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/component.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/algorithm/container.h" #include "absl/base/nullability.h" #include "absl/container/flat_hash_map.h" #include "absl/log/die_if_null.h" @@ -42,6 +43,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/quantization/stablehlo/cc/types.h" #include "tensorflow/compiler/mlir/quantization/stablehlo/passes/passes.h" #include "tensorflow/compiler/mlir/quantization/stablehlo/quantization_config.pb.h" +#include "tensorflow/compiler/mlir/quantization/tensorflow/calibrator/calibration_statistics.pb.h" #include "tensorflow/compiler/mlir/quantization/tensorflow/cc/run_passes.h" #include "tensorflow/compiler/mlir/quantization/tensorflow/exported_model.pb.h" #include "tensorflow/compiler/mlir/quantization/tensorflow/python/py_function_lib.h" @@ -51,32 +53,52 @@ limitations under the License. #include "tsl/platform/statusor.h" namespace mlir::quant::stablehlo { +namespace { using ::stablehlo::quantization::AddCalibrationStatistics; using ::stablehlo::quantization::CreateRepresentativeDatasetFileMap; using ::stablehlo::quantization::DisableDebugging; +using ::stablehlo::quantization::IsCalibrationRequired; using ::stablehlo::quantization::QuantizationConfig; +using ::stablehlo::quantization::ReadStatistics; using ::stablehlo::quantization::RepresentativeDatasetConfig; using ::stablehlo::quantization::io::CreateTmpDir; using ::stablehlo::quantization::io::GetLocalTmpFileName; +using ::stablehlo::quantization::io::ListDirectory; using ::tensorflow::AssetFileDef; using ::tensorflow::SignatureDef; +using ::tensorflow::calibrator::CalibrationStatistics; using ::tensorflow::quantization::ExportedModel; using ::tensorflow::quantization::PyFunctionLibrary; using ::tensorflow::quantization::RunPasses; +using CalibrationStatisticsFlatMap = + absl::flat_hash_map; -absl::Status RunCalibrationPasses(mlir::ModuleOp module_op, MLIRContext& ctx, - absl::string_view calibration_data_dir) { +} // namespace + +absl::Status RunCalibrationPasses( + mlir::ModuleOp module_op, MLIRContext& ctx, + absl::string_view calibration_data_dir, + const bool force_regenerate_calibration_data) { // Disable DumpTensor ops when running calibration. DisableDebugging(module_op); + std::vector skipping_aggregator_ops; + if (!force_regenerate_calibration_data) { + TF_ASSIGN_OR_RETURN(const CalibrationStatisticsFlatMap statistics_map, + ReadStatistics(calibration_data_dir)); + absl::c_for_each(statistics_map, [&](const auto& iter) { + return skipping_aggregator_ops.push_back(iter.first); + }); + } + return RunPasses( /*name=*/ CalibrationComponent::kName, /*add_passes_func=*/ - [calibration_data_dir](PassManager& pm) { - pm.addPass( - CreateInsertCalibrationStatisticsSaverPass(calibration_data_dir)); + [calibration_data_dir, &skipping_aggregator_ops](PassManager& pm) { + pm.addPass(CreateInsertCalibrationStatisticsSaverPass( + calibration_data_dir, skipping_aggregator_ops)); }, ctx, module_op); } @@ -97,8 +119,9 @@ CalibrationComponent::CalibrationComponent( signature_def_map_(std::move(signature_def_map)), signature_keys_(std::move(signature_keys)) {} -absl::StatusOr CalibrationComponent::ExportToSavedModel( +absl::Status CalibrationComponent::ExportToSavedModel( ModuleOp module_op, absl::string_view calibration_data_dir, + const bool force_regenerate_calibration_data, const absl::string_view dst_saved_model_path) { TF_ASSIGN_OR_RETURN(const std::string checkpoint_dir, GetLocalTmpFileName()); @@ -106,8 +129,13 @@ absl::StatusOr CalibrationComponent::ExportToSavedModel( // be reflected in the original values. mlir::OwningOpRef cloned_module_ref(module_op.clone()); - TF_RETURN_IF_ERROR( - RunCalibrationPasses(*cloned_module_ref, *ctx_, calibration_data_dir)); + TF_RETURN_IF_ERROR(RunCalibrationPasses(*cloned_module_ref, *ctx_, + calibration_data_dir, + force_regenerate_calibration_data)); + + const bool is_calibration_required = + IsCalibrationRequired(*cloned_module_ref); + if (!is_calibration_required) return absl::OkStatus(); // `duplicate_shape_determining_constants = false` because the // resulting graph of this step is not expected to be loaded on TPU. @@ -128,13 +156,13 @@ absl::StatusOr CalibrationComponent::ExportToSavedModel( src_saved_model_path_, tags_, signature_def_map_); - return exported_model; + return absl::OkStatus(); } absl::StatusOr CalibrationComponent::Run( ModuleOp module_op, const QuantizationConfig& config) { - // Exports the pre-calibrated model to SavedModel. - TF_ASSIGN_OR_RETURN(const std::string precalibrated_saved_model_dir, + // Export the calibration model to SavedModel. + TF_ASSIGN_OR_RETURN(const std::string calibration_saved_model_dir, CreateTmpDir()); std::string calibration_data_dir = @@ -143,29 +171,32 @@ absl::StatusOr CalibrationComponent::Run( TF_ASSIGN_OR_RETURN(calibration_data_dir, CreateTmpDir()); } - TF_ASSIGN_OR_RETURN(ExportedModel exported_model, - ExportToSavedModel(module_op, calibration_data_dir, - precalibrated_saved_model_dir)); - - // Translates `RepresentativeDatasetConfig`s to signature key -> - // `RepresentativeDatasetFile` mapping. - const auto dataset_configs = - config.calibration_options().representative_datasets(); - const std::vector dataset_config_vector( - dataset_configs.begin(), dataset_configs.end()); - TF_ASSIGN_OR_RETURN( - const auto representative_dataset_file_map, - CreateRepresentativeDatasetFileMap(dataset_config_vector)); - - // Runs calibration on the exported model. The statistics will be stored in a - // separate singleton object `CalibratorSingleton` and are directly added to - // `exported_model` without re-importing it. - if (py_function_lib_->RunCalibration( - precalibrated_saved_model_dir, signature_keys_, tags_, - /*force_graph_mode_calibration=*/true, - representative_dataset_file_map) == std::nullopt) { - return absl::InternalError( - "CalibrationComponent error: Failed to run calibration."); + TF_RETURN_IF_ERROR(ExportToSavedModel( + module_op, calibration_data_dir, + config.calibration_options().force_regenerate_calibration_data(), + calibration_saved_model_dir)); + + TF_ASSIGN_OR_RETURN(std::vector calibration_saved_model_files, + ListDirectory(calibration_saved_model_dir)); + if (!calibration_saved_model_files.empty()) { + // Translate `RepresentativeDatasetConfig`s to signature key -> + // `RepresentativeDatasetFile` mapping. + const auto dataset_configs = + config.calibration_options().representative_datasets(); + const std::vector dataset_config_vector( + dataset_configs.begin(), dataset_configs.end()); + TF_ASSIGN_OR_RETURN( + const auto representative_dataset_file_map, + CreateRepresentativeDatasetFileMap(dataset_config_vector)); + + // Run calibration on the exported model. + if (py_function_lib_->RunCalibration( + calibration_saved_model_dir, signature_keys_, tags_, + /*force_graph_mode_calibration=*/true, + representative_dataset_file_map) == std::nullopt) { + return absl::InternalError( + "CalibrationComponent error: Failed to run calibration."); + } } if (absl::Status status = AddCalibrationStatistics( diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/component.h b/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/component.h index a7f94e9f0a37bf..03d2dd933732d4 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/component.h +++ b/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/component.h @@ -77,10 +77,11 @@ class CalibrationComponent : public Component { // Exports `module_op` to SavedModel at `dst_saved_model_path`. This is used // to export the pre-calibrated `module_op` to SavedModel so that the // calibration process can use it to load and run the graph with the - // representative dataset. - absl::StatusOr ExportToSavedModel( - ModuleOp module_op, absl::string_view calibration_data_dir, - absl::string_view dst_saved_model_path); + // representative dataset. Returns a failure status if the export fails. + absl::Status ExportToSavedModel(ModuleOp module_op, + absl::string_view calibration_data_dir, + bool force_regenerate_calibration_data, + absl::string_view dst_saved_model_path); // Imports the SavedModel at `calibrated_saved_model_path` to `ModuleOp` after // running calibration. @@ -113,7 +114,8 @@ class CalibrationComponent : public Component { // Runs passes to prepare the calibration model. absl::Status RunCalibrationPasses(mlir::ModuleOp module_op, MLIRContext& ctx, - absl::string_view calibration_data_dir); + absl::string_view calibration_data_dir, + bool force_regenerate_calibration_data); } // namespace mlir::quant::stablehlo diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/statistics.cc b/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/statistics.cc index 141af8e06fc6db..ea96bd029b079e 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/statistics.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/statistics.cc @@ -45,6 +45,8 @@ using ::tensorflow::quantization::PyFunctionLibrary; using CalibrationStatisticsFlatMap = absl::flat_hash_map; +} // namespace + // Reads the calibration statistics from the given directory. absl::StatusOr ReadStatistics( absl::string_view calibration_data_dir) { @@ -63,8 +65,6 @@ absl::StatusOr ReadStatistics( return statistics_map; } -} // namespace - absl::Status AddCalibrationStatistics( mlir::ModuleOp module_op, absl::string_view calibration_data_dir, const CalibrationOptions& calibration_options, @@ -102,4 +102,14 @@ absl::Status AddCalibrationStatistics( return status; } +bool IsCalibrationRequired(mlir::ModuleOp module_op) { + bool calibration_required = false; + module_op.walk( + [&calibration_required]( + mlir::TF::CalibrationStatisticsSaverOp statistics_saver_op) { + calibration_required = true; + }); + return calibration_required; +} + } // namespace stablehlo::quantization diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/statistics.h b/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/statistics.h index 48112b1eafc816..41f78be3578bca 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/statistics.h +++ b/tensorflow/compiler/mlir/quantization/stablehlo/cc/calibration/statistics.h @@ -15,14 +15,24 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_MLIR_QUANTIZATION_STABLEHLO_CC_CALIBRATION_STATISTICS_H_ #define TENSORFLOW_COMPILER_MLIR_QUANTIZATION_STABLEHLO_CC_CALIBRATION_STATISTICS_H_ +#include + +#include "absl/container/flat_hash_map.h" #include "absl/status/status.h" +#include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "mlir/IR/BuiltinOps.h" // from @llvm-project #include "tensorflow/compiler/mlir/quantization/stablehlo/quantization_config.pb.h" +#include "tensorflow/compiler/mlir/quantization/tensorflow/calibrator/calibration_statistics.pb.h" #include "tensorflow/compiler/mlir/quantization/tensorflow/python/py_function_lib.h" namespace stablehlo::quantization { +// Reads the calibration statistics from the given directory. +absl::StatusOr> +ReadStatistics(absl::string_view calibration_data_dir); + // Adds calibrated min / max values to CustomAggregator nodes in `graph_def`. // The min and max values will be added to the "min" and "max" attributes, // respectively. `calibration_options` provides the strategy to retrieve min and @@ -32,6 +42,9 @@ absl::Status AddCalibrationStatistics( const stablehlo::quantization::CalibrationOptions& calibration_options, const tensorflow::quantization::PyFunctionLibrary& py_function_library); +// Checks if the model required calibration. +bool IsCalibrationRequired(mlir::ModuleOp module_op); + } // namespace stablehlo::quantization #endif // TENSORFLOW_COMPILER_MLIR_QUANTIZATION_STABLEHLO_CC_CALIBRATION_STATISTICS_H_ diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/insert_calibration_statistics_saver.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/insert_calibration_statistics_saver.cc index 9e6bf25fb44811..8cb0b645c312cf 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/insert_calibration_statistics_saver.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/insert_calibration_statistics_saver.cc @@ -15,6 +15,8 @@ limitations under the License. #include #include #include +#include +#include #include "absl/strings/string_view.h" #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project @@ -48,11 +50,14 @@ std::string GetOutputFilePath(absl::string_view calibration_data_dir, } // Finds `CustomAggregator` ops and collects their outputs and attributes. -void FindCustomAggregatorOps(Region& region, - SmallVector& statistics_outputs, - SmallVector& ids, - SmallVector& calibration_methods) { +void FindCustomAggregatorOps( + Region& region, + const std::unordered_set& aggregator_ops_to_ignore, + SmallVector& statistics_outputs, SmallVector& ids, + SmallVector& calibration_methods) { for (auto op : region.getOps()) { + if (aggregator_ops_to_ignore.count(op.getId().str())) continue; + ids.push_back(op.getId()); calibration_methods.push_back(op.getCalibrationMethod()); statistics_outputs.push_back(op.getMin()); @@ -63,11 +68,13 @@ void FindCustomAggregatorOps(Region& region, // Inserts a `CalibrationStatisticsSaverOp` to the end of the region. LogicalResult InsertCalibrationStatisticsSaverOp( - Region& region, MLIRContext& ctx, absl::string_view output_file_path) { + Region& region, MLIRContext& ctx, absl::string_view output_file_path, + const std::unordered_set& aggregator_ops_to_ignore) { SmallVector statistics_outputs; SmallVector ids; SmallVector calibration_methods; - FindCustomAggregatorOps(region, statistics_outputs, ids, calibration_methods); + FindCustomAggregatorOps(region, aggregator_ops_to_ignore, statistics_outputs, + ids, calibration_methods); if (statistics_outputs.empty()) return failure(); OpBuilder builder(&ctx); @@ -115,6 +122,7 @@ bool ContainCalibrationStatisticsSaverOp(Operation* op) { } // namespace +#define GEN_PASS_DECL_INSERTCALIBRATIONSTATISTICSSAVERPASS #define GEN_PASS_DEF_INSERTCALIBRATIONSTATISTICSSAVERPASS #include "tensorflow/compiler/mlir/quantization/stablehlo/passes/passes.h.inc" @@ -126,11 +134,7 @@ class InsertCalibrationStatisticsSaverPass InsertCalibrationStatisticsSaverPass>:: InsertCalibrationStatisticsSaverPassBase; - explicit InsertCalibrationStatisticsSaverPass(StringRef calibration_data_dir) - : calibration_data_dir_(calibration_data_dir) {} - private: - std::string calibration_data_dir_; void runOnOperation() override; }; @@ -138,17 +142,22 @@ void InsertCalibrationStatisticsSaverPass::runOnOperation() { ModuleOp module_op = getOperation(); MLIRContext& ctx = getContext(); + std::unordered_set aggregator_ops_to_ignore( + aggregator_ops_to_ignore_.begin(), aggregator_ops_to_ignore_.end()); + // Insert CalibrationStatisticsSaverOp to the end of each region. for (auto func_op : module_op.getOps()) { int32_t output_file_idx = 0; StringRef func_name = func_op.getSymName(); - func_op.walk([&output_file_idx, &ctx, &func_name, this](Operation* op) { + func_op.walk([&output_file_idx, &ctx, &func_name, &aggregator_ops_to_ignore, + this](Operation* op) { for (Region& region : op->getRegions()) { if (succeeded(InsertCalibrationStatisticsSaverOp( region, ctx, GetOutputFilePath(calibration_data_dir_, func_name, - output_file_idx)))) { + output_file_idx), + aggregator_ops_to_ignore))) { ++output_file_idx; }; } @@ -167,9 +176,14 @@ void InsertCalibrationStatisticsSaverPass::runOnOperation() { } std::unique_ptr> -CreateInsertCalibrationStatisticsSaverPass(StringRef calibration_data_dir) { - return std::make_unique( - calibration_data_dir); +CreateInsertCalibrationStatisticsSaverPass( + StringRef calibration_data_dir, + const std::vector& aggregator_ops_to_ignore) { + InsertCalibrationStatisticsSaverPassOptions options = { + .aggregator_ops_to_ignore_ = aggregator_ops_to_ignore, + .calibration_data_dir_ = calibration_data_dir.str(), + }; + return std::make_unique(options); } } // namespace mlir::quant::stablehlo diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/passes.h b/tensorflow/compiler/mlir/quantization/stablehlo/passes/passes.h index a6836eaf1009cd..d13c589c2ba890 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/passes.h +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/passes.h @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include #include "absl/status/statusor.h" #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project @@ -46,7 +47,9 @@ CreateLiftQuantizableSpotsAsFunctionsPass( // Creates a pass that inserts CalibrationStatisticsSaverOp. std::unique_ptr> -CreateInsertCalibrationStatisticsSaverPass(StringRef calibration_data_dir); +CreateInsertCalibrationStatisticsSaverPass( + StringRef calibration_data_dir, + const std::vector& aggregator_ops_to_ignore); // Adds generated pass default constructors or options definitions. #define GEN_PASS_DECL diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/passes.td b/tensorflow/compiler/mlir/quantization/stablehlo/passes/passes.td index b61550e5347e36..7661e8d562fbe9 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/passes.td +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/passes.td @@ -228,5 +228,12 @@ def InsertCalibrationStatisticsSaverPass : Pass<"stablehlo-insert-calibration-st `CalibrationStatisticsSaver` op at the end of the function to collect their statistics. }]; + let options = [ + ListOption<"aggregator_ops_to_ignore_", "aggregator-ops-to-ignore", "std::string", + "Ops to ignore when inserting CalibrationStatisticsSaver.">, + Option<"calibration_data_dir_", "calibration-data-dir", + "std::string", /*default=*/"", + "The directory to save calibration data.">, + ]; let dependentDialects = ["TF::TensorFlowDialect"]; } diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/python/BUILD b/tensorflow/compiler/mlir/quantization/stablehlo/python/BUILD index df5252b986adf5..a4bdf7da09f8ee 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/python/BUILD +++ b/tensorflow/compiler/mlir/quantization/stablehlo/python/BUILD @@ -62,6 +62,7 @@ pytype_strict_library( # "//tensorflow/python/ops:nn_ops", # "//tensorflow/python/ops:variables", # "//tensorflow/python/platform:client_testlib", +# "//tensorflow/python/platform:tf_logging", # "//tensorflow/python/saved_model:load", # "//tensorflow/python/saved_model:loader", # "//tensorflow/python/saved_model:save", diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/python/integration_test/quantize_model_test.py b/tensorflow/compiler/mlir/quantization/stablehlo/python/integration_test/quantize_model_test.py index 5e9f9955f329f3..ab0fb1d5662bba 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/python/integration_test/quantize_model_test.py +++ b/tensorflow/compiler/mlir/quantization/stablehlo/python/integration_test/quantize_model_test.py @@ -577,6 +577,114 @@ def data_gen() -> repr_dataset.RepresentativeDataset: 0.65, ) + def test_reuse_calibration_data(self): + _, y_shape, bias_shape, x_signature, y_signature = ( + self._prepare_sample_einsum_datashapes('abc,cde->abde', use_bias=True) + ) + + self._create_einsum_model( + self._input_saved_model_path, + 'abc,cde->abde', + y_shape, + x_signature, + y_signature, + bias_shape, + ) + + # Generate model input data. + rng = np.random.default_rng(seed=42) + input_data = ops.convert_to_tensor( + rng.uniform(low=0.0, high=1.0, size=x_signature).astype('f4') + ) + + def data_gen() -> repr_dataset.RepresentativeDataset: + for _ in range(100): + yield { + 'x': ops.convert_to_tensor( + np.random.uniform(low=0.0, high=1.0, size=x_signature).astype( + 'f4' + ) + ), + } + + dataset_path = self.create_tempfile('tfrecord').full_path + path_map = {'serving_default': dataset_path} + repr_dataset.TfRecordRepresentativeDatasetSaver(path_map).save( + {'serving_default': data_gen()} + ) + + calibration_data_dir = self.create_tempdir('calibration_data').full_path + config = qc.QuantizationConfig( + static_range_ptq_preset=qc.StaticRangePtqPreset( + representative_datasets=[ + qc.RepresentativeDatasetConfig( + tf_record=qc.TfRecordFile(path=dataset_path) + ) + ] + ), + tf_saved_model=qc.TfSavedModelConfig(tags=[tag_constants.SERVING]), + calibration_options=qc.CalibrationOptions( + calibration_method=_CalibrationMethod.CALIBRATION_METHOD_MIN_MAX, + calibration_data_dir=calibration_data_dir, + ), + ) + + # Run quantization the first time, calibration is expected to be run. + with self.assertLogs(level='INFO') as info_logs: + quantization.quantize_saved_model( + self._input_saved_model_path, + self._output_saved_model_path, + config, + ) + self.assertTrue( + self._any_log_contains( + 'Calibration step is executed in graph mode.', + info_logs.records, + ) + ) + module_str = self._extract_first_xla_call_module_op( + self._output_saved_model_path + ) + self.assertTrue( + re.search('stablehlo.dot_general.*xi8>.*xi8>.*xi32>', module_str) + ) + + # Run quantization the first time, calibration is expected to be skipped. + output_saved_model_path_2 = self.create_tempdir('output2').full_path + with self.assertLogs(level='INFO') as info_logs: + quantization.quantize_saved_model( + self._input_saved_model_path, + output_saved_model_path_2, + config, + ) + self.assertFalse( + self._any_log_contains( + 'Calibration step is executed in graph mode.', + info_logs.records, + ) + ) + module_str = self._extract_first_xla_call_module_op( + output_saved_model_path_2 + ) + self.assertTrue( + re.search('stablehlo.dot_general.*xi8>.*xi8>.*xi32>', module_str) + ) + + # Expect both quantized model to produce the same results. + root = load.load(self._output_saved_model_path) + self.assertCountEqual(root.signatures.keys(), {'serving_default'}) + new_outputs_1 = root.signatures['serving_default']( + x=ops.convert_to_tensor(input_data) + ) + + root = load.load(output_saved_model_path_2) + self.assertCountEqual(root.signatures.keys(), {'serving_default'}) + new_outputs_2 = root.signatures['serving_default']( + x=ops.convert_to_tensor(input_data) + ) + + self.assertAllClose(new_outputs_1, new_outputs_2) + @parameterized.named_parameters( ('use_constant_with_int32_input', np.int32, False), ('use_variable_with_int32_input', np.int32, True), diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/python/integration_test/quantize_model_test_base.py b/tensorflow/compiler/mlir/quantization/stablehlo/python/integration_test/quantize_model_test_base.py index 31c53a4cf20fe9..fef1784fec9370 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/python/integration_test/quantize_model_test_base.py +++ b/tensorflow/compiler/mlir/quantization/stablehlo/python/integration_test/quantize_model_test_base.py @@ -33,11 +33,13 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import load from tensorflow.python.saved_model import loader_impl from tensorflow.python.saved_model import save as saved_model_save from tensorflow.python.types import core + FUNC_ALIAS = 'some_alias' @@ -164,6 +166,27 @@ def matmul(self, input_tensor: core.Tensor) -> Mapping[str, core.Tensor]: ) return model + def _any_log_contains( + self, substring: str, log_record_list: List['logging.LogRecord'] + ) -> bool: + """Returns True if any of the log contains a given substring. + + Args: + substring: A piece of string to check whether it exists in the log + message. + log_record_list: A list of `absl.logging.LogRecord`s. + + Returns: + True if and only if the substring exists in any of the log in + `log_record_list`. + """ + return any( + map( + lambda log_record: substring in str(log_record.message), + log_record_list, + ) + ) + def _create_matmul_and_same_scale_model( self, input_shape: Sequence[int], diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/quantization_config.proto b/tensorflow/compiler/mlir/quantization/stablehlo/quantization_config.proto index f156b66997211d..a307e1d927eea3 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/quantization_config.proto +++ b/tensorflow/compiler/mlir/quantization/stablehlo/quantization_config.proto @@ -278,7 +278,7 @@ message DebuggerConfig { } // Defines various calibration options. -// Next ID: 5 +// Next ID: 6 message CalibrationOptions { // Configurations for calibration methods. // Next ID: 7 @@ -332,8 +332,13 @@ message CalibrationOptions { // representative dataset used to calibrate a function. repeated RepresentativeDatasetConfig representative_datasets = 3; - // The path to save calibration statistics data. + // The path to save calibration statistics data. If not set, use a temporary + // directory. string calibration_data_dir = 4; + + // Whether to reuse the existing calibration data in `calibration_data_dir`. + // Default to False. + bool force_regenerate_calibration_data = 5; } // Quantization configuration for StableHLO Quantizer. This is the primary diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/tests/passes/insert_calibration_statistics_saver_with_skipping.mlir b/tensorflow/compiler/mlir/quantization/stablehlo/tests/passes/insert_calibration_statistics_saver_with_skipping.mlir new file mode 100644 index 00000000000000..97d546afe2b723 --- /dev/null +++ b/tensorflow/compiler/mlir/quantization/stablehlo/tests/passes/insert_calibration_statistics_saver_with_skipping.mlir @@ -0,0 +1,47 @@ +// RUN: stablehlo-quant-opt %s -split-input-file -stablehlo-insert-calibration-statistics-saver='aggregator-ops-to-ignore=skipping_id' | FileCheck %s + +func.func @serving_default(%arg0: tensor<1x3x4x3xf32>) -> (tensor<1x2x2x2xf32>) attributes {tf.entry_function = {control_outputs = "", inputs = "serving_default_input_tensor:0", outputs = "PartitionedCall:0"}} { + %cst = "tf.Const"() <{value = dense<[[[[-0.891899645, 0.392044574], [0.77720493, 1.31188095], [0.255048186, 2.700150e+00]], [[-1.08111858, -0.406604826], [-0.298575521, -2.25356531], [-1.00201964, 2.54532099]], [[-1.34911358, 0.279911458], [-0.868258893, -1.36708188], [0.866317451, -2.05804896]]], [[[-0.591397941, 0.331505477], [0.715151429, 2.64073896], [1.27163255, 0.206143498]], [[0.474211812, 1.45044816], [0.119936548, 2.54149938], [-0.939900994, 0.438387245]], [[-1.12486279, -1.09022558], [0.82202208, 1.04652023], [1.30316162, 2.62054276]]]]> : tensor<2x3x3x2xf32>}> : () -> tensor<2x3x3x2xf32> + %output, %min, %max, %histogram = "tf.CustomAggregator"(%arg0) <{calibration_method = 5 : i32, id = "skipping_id", num_bins = 32 : i32, max_percentile = 0.000000e+00 : f32, min_percentile = 0.000000e+00 : f32}> : (tensor<1x3x4x3xf32>) -> (tensor<1x3x4x3xf32>, tensor, tensor, tensor<512xi64>) + %0 = "tf.Conv2D"(%output, %cst) <{data_format = "NHWC", dilations = [1, 1, 1, 1], explicit_paddings = [], padding = "SAME", strides = [1, 2, 2, 1], use_cudnn_on_gpu = true}> {attr_map = "0:strides,1:use_cudnn_on_gpu,2:padding,3:explicit_paddings,4:dilations", device = ""} : (tensor<1x3x4x3xf32>, tensor<2x3x3x2xf32>) -> tensor<1x2x2x2xf32> + %output_1, %min_2, %max_3, %histogram_4 = "tf.CustomAggregator"(%0) <{calibration_method = 5 : i32, id = "keeping_id", num_bins = 32 : i32, max_percentile = 0.000000e+00 : f32, min_percentile = 0.000000e+00 : f32}> : (tensor<1x2x2x2xf32>) -> (tensor<1x2x2x2xf32>, tensor, tensor, tensor<512xi64>) + %1 = "tf.Identity"(%output_1) {device = ""} : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32> + return %1 : tensor<1x2x2x2xf32> +} +// CHECK-LABEL: @serving_default +// CHECK: %[[CUSTOM_AGGREGATOR_0:.*]], %[[MIN_O:.*]], %[[MAX_O:.*]], %[[HISTOGRAM_0:.*]] = "tf.CustomAggregator" +// CKECK-SAME: <{calibration_method = 5 : i32, id = "skipping_id", num_bins = 32 : i32, max_percentile = 0.000000e+00 : f32, min_percentile = 0.000000e+00 : f32}> : (tensor<1x3x4x3xf32>) -> (tensor<1x3x4x3xf32>, tensor, tensor, tensor<512xi64>) +// CHECK: %[[CUSTOM_AGGREGATOR_1:.*]], %[[MIN_1:.*]], %[[MAX_1:.*]], %[[HISTOGRAM_1:.*]] = "tf.CustomAggregator" +// CKECK-SAME: <{calibration_method = 5 : i32, id = "keeping_id", num_bins = 32 : i32, max_percentile = 0.000000e+00 : f32, min_percentile = 0.000000e+00 : f32}> : (tensor<1x3x4x3xf32>) -> (tensor<1x3x4x3xf32>, tensor, tensor, tensor<512xi64>) +// CHECK: "tf.CalibrationStatisticsSaver"(%[[MIN_1]], %[[MAX_1]], %[[HISTOGRAM_1]]) +// CHECK-SAME: <{calibration_methods = [5 : i32], ids = ["keeping_id"], output_file_path = "serving_default_0.pb"}> : (tensor, tensor, tensor<512xi64>) -> () +// CHECK: return + +// ----- + +module attributes {tf.versions = {bad_consumers = [], min_consumer = 12 : i32, producer = 1836 : i32}, tf_saved_model.semantics} { + func.func @main(%arg0: tensor<10x1x1024xf32> {tf_saved_model.index_path = ["input_tensor"]}) -> (tensor<10x1x3xf32> {tf_saved_model.index_path = ["output"]}) attributes {tf.entry_function = {control_outputs = "", inputs = "serving_default_input_tensor:0", outputs = "PartitionedCall:0"}, tf_saved_model.exported_names = ["serving_default"]} { + %cst = stablehlo.constant dense<0.000000e+00>: tensor<10x1024x3xf32> + %output, %min, %max, %histogram = "tf.CustomAggregator"(%arg0) <{calibration_method = 1 : i32, id = "skipping_id", max_percentile = 0.000000e+00 : f32, min_percentile = 0.000000e+00 : f32, num_bins = 0 : i32}> : (tensor<10x1x1024xf32>) -> (tensor<10x1x1024xf32>, tensor, tensor, tensor<0xi64>) + %0 = "tf.XlaCallModule"(%output, %cst) <{Sout = [#tf_type.shape<10x1x3>], dim_args_spec = [], disabled_checks = [], function_list = [], has_token_input_output = false, module = "", platforms = ["CPU"], version = 9 : i64}> {_entry_function = @composite_dot_general_with_relu_fn_1, _original_entry_function = "composite_dot_general_with_relu_fn_1", _quantization_method = "static_range_ptq { }", _stablehlo_module_attrs = {jax.uses_shape_polymorphism = true}, _tfl_quant_trait = "fully_quantizable"} : (tensor<10x1x1024xf32>, tensor<10x1024x3xf32>) -> tensor<10x1x3xf32> + %output_0, %min_1, %max_2, %histogram_3 = "tf.CustomAggregator"(%0) <{calibration_method = 1 : i32, id = "keeping_id", max_percentile = 0.000000e+00 : f32, min_percentile = 0.000000e+00 : f32, num_bins = 0 : i32}> : (tensor<10x1x3xf32>) -> (tensor<10x1x3xf32>, tensor, tensor, tensor<0xi64>) + return %output_0 : tensor<10x1x3xf32> + } + // CHECK-LABEL: @main + // CHECK: %[[CUSTOM_AGGREGATOR_0:.*]], %[[MIN_O:.*]], %[[MAX_O:.*]], %[[HISTOGRAM_0:.*]] = "tf.CustomAggregator" + // CKECK-SAME: <{calibration_method = 1 : i32, id = "skipping_id", max_percentile = 0.000000e+00 : f32, min_percentile = 0.000000e+00 : f32, num_bins = 0 : i32}> + // CHECK: %[[CUSTOM_AGGREGATOR_1:.*]], %[[MIN_1:.*]], %[[MAX_1:.*]], %[[HISTOGRAM_1:.*]] = "tf.CustomAggregator" + // CKECK-SAME: <{calibration_method = 1 : i32, id = "keeping_id", max_percentile = 0.000000e+00 : f32, min_percentile = 0.000000e+00 : f32, num_bins = 0 : i32}> + // CHECK: "tf.CalibrationStatisticsSaver"(%[[MIN_1]], %[[MAX_1]], %[[HISTOGRAM_1]]) + // CHECK-SAME: <{calibration_methods = [1 : i32], ids = ["keeping_id"], output_file_path = "main_0.pb"}> : (tensor, tensor, tensor<0xi64>) -> () + // CHECK: return + + func.func private @composite_dot_general_with_relu_fn_1(%arg0: tensor<10x1x1024xf32>, %arg1: tensor<10x1024x3xf32>) -> tensor<10x1x3xf32> attributes {_from_xla_call_module, tf_quant.composite_function} { + %cst = stablehlo.constant dense<0.000000e+00> : tensor<10x1x3xf32> + %0 = stablehlo.dot_general %arg0, %arg1, batching_dims = [0] x [0], contracting_dims = [2] x [1], precision = [DEFAULT, DEFAULT] {mhlo.frontend_attributes = {grad_x = "false", grad_y = "false"}} : (tensor<10x1x1024xf32>, tensor<10x1024x3xf32>) -> tensor<10x1x3xf32> + %1 = stablehlo.maximum %0, %cst : tensor<10x1x3xf32> + return %1 : tensor<10x1x3xf32> + } + // CHECK-LABEL: func.func private @composite_dot_general_with_relu_fn_1 + // CHECK-NOT: "tf.CalibrationStatisticsSaver" +} \ No newline at end of file diff --git a/tensorflow/compiler/mlir/quantization/tensorflow/python/integration_test/quantize_model_test.py b/tensorflow/compiler/mlir/quantization/tensorflow/python/integration_test/quantize_model_test.py index e9c18a66943ff9..53228d30bd53fc 100644 --- a/tensorflow/compiler/mlir/quantization/tensorflow/python/integration_test/quantize_model_test.py +++ b/tensorflow/compiler/mlir/quantization/tensorflow/python/integration_test/quantize_model_test.py @@ -2710,6 +2710,116 @@ def data_gen() -> repr_dataset.RepresentativeDataset: self.assertAllClose(new_outputs, got_outputs, atol=0.097) self.assertAllClose(new_outputs, expected_outputs, atol=0.057) + def test_reuse_calibration_data(self): + model = self._create_simple_gather_and_conv_model( + dtypes.int32, filter_shape=(2, 3, 3, 1024) + ) + saved_model_save.save(model, self._input_saved_model_path) + + data_gen = self._create_data_generator( + input_key='input_tensor', + shape=[50], + minval=0, + maxval=64, + dtype=dtypes.int32, + ) + + tags = {tag_constants.SERVING} + + calibration_data_dir = self.create_tempdir('calibration_data').full_path + quantization_options = quant_opts_pb2.QuantizationOptions( + quantization_method=quant_opts_pb2.QuantizationMethod( + preset_method=_PresetMethod.METHOD_STATIC_RANGE_INT8 + ), + tags=tags, + signature_keys=['serving_default'], + op_set=quant_opts_pb2.XLA, + force_graph_mode_calibration=True, + calibration_options=stablehlo_quant_config_pb2.CalibrationOptions( + calibration_method=_CalibrationMethod.CALIBRATION_METHOD_MIN_MAX, + calibration_data_dir=calibration_data_dir, + ), + ) + + # Run quantization the first time, calibration is expected to be run. + with self.assertLogs(level='INFO') as info_logs: + # Save the logger verbosity. + prev_log_level = logging.get_verbosity() + logging.set_verbosity(logging.INFO) + try: + converted_model1 = quantize_model.quantize( + self._input_saved_model_path, + self._output_saved_model_path, + quantization_options, + representative_dataset=data_gen, + ) + finally: + # Restore the logger verbosity. + logging.set_verbosity(prev_log_level) + + self.assertNotEmpty(info_logs.records) + self.assertTrue( + self._any_log_contains( + 'Calibration step is executed in graph mode.', + info_logs.records, + ) + ) + self.assertIsNotNone(converted_model1) + self.assertCountEqual( + converted_model1.signatures._signatures.keys(), {'serving_default'} + ) + + output_loader = saved_model_loader.SavedModelLoader( + self._output_saved_model_path + ) + output_graphdef = output_loader.get_meta_graph_def_from_tags( + tags + ).graph_def + self.assertTrue(self._contains_op(output_graphdef, 'XlaConvV2')) + + # Run quantization the first time, calibration is expected to be skipped. + with self.assertLogs(level='INFO') as info_logs: + # Save the logger verbosity. + prev_log_level = logging.get_verbosity() + logging.set_verbosity(logging.INFO) + try: + converted_model2 = quantize_model.quantize( + self._input_saved_model_path, + self._output_saved_model_path, + quantization_options, + representative_dataset=data_gen, + overwrite_output_directory=True, + ) + finally: + # Restore the logger verbosity. + logging.set_verbosity(prev_log_level) + + self.assertNotEmpty(info_logs.records) + self.assertFalse( + self._any_log_contains( + 'Calibration step is executed in graph mode.', + info_logs.records, + ) + ) + self.assertIsNotNone(converted_model2) + self.assertCountEqual( + converted_model2.signatures._signatures.keys(), {'serving_default'} + ) + + # Expect two models to produce the same results. + test_data = ops.convert_to_tensor( + np.random.uniform(low=0, high=64, size=(32)).astype( + dtypes.int32.as_numpy_dtype + ) + ) + new_outputs_1 = converted_model1.signatures['serving_default']( + input_tensor=test_data + )['output'] + new_outputs_2 = converted_model2.signatures['serving_default']( + input_tensor=test_data + )['output'] + self.assertAllClose(new_outputs_1, new_outputs_2) + @test_util.run_in_graph_and_eager_modes def test_function_alias_preserved(self): model = self._create_conv2d_model( diff --git a/tensorflow/compiler/mlir/quantization/tensorflow/python/quantize_model.cc b/tensorflow/compiler/mlir/quantization/tensorflow/python/quantize_model.cc index e055d9dda95bbb..aef2732549ede2 100644 --- a/tensorflow/compiler/mlir/quantization/tensorflow/python/quantize_model.cc +++ b/tensorflow/compiler/mlir/quantization/tensorflow/python/quantize_model.cc @@ -84,6 +84,7 @@ using ::stablehlo::quantization::AddCalibrationStatistics; using ::stablehlo::quantization::ChangeToQuantizedFilename; using ::stablehlo::quantization::DebuggerConfig; using ::stablehlo::quantization::ExpandPresets; +using ::stablehlo::quantization::IsCalibrationRequired; using ::stablehlo::quantization::PopulateDefaults; using ::stablehlo::quantization::QuantizationConfig; using ::stablehlo::quantization::io::CreateTmpDir; @@ -163,7 +164,10 @@ absl::StatusOr ExportCalibrationModel( mlir::OwningOpRef cloned_module_ref(module_op.clone()); TF_RETURN_IF_ERROR( - RunCalibrationPasses(*cloned_module_ref, *context, calibration_data_dir)); + RunCalibrationPasses(*cloned_module_ref, *context, calibration_data_dir, + quantization_options.calibration_options() + .force_regenerate_calibration_data())); + if (!IsCalibrationRequired(*cloned_module_ref)) return ExportedModel(); absl::StatusOr exported_model = ModuleOpToExportedModel( *cloned_module_ref, context, kTfQuantPtqPreCalibrationStepName, @@ -457,16 +461,18 @@ absl::StatusOr QuantizeStaticRangePtq( *function_aliases, calibration_data_dir)); // Save and run the calibration model. - TF_ASSIGN_OR_RETURN(std::string precalibrated_saved_model_dir, - CreateTmpDir()); - py_function_library.SaveExportedModel( - precalibrated_saved_model_dir, calibration_exported_model, - saved_model_path, tags, signature_def_map); - - py_function_library.RunCalibration( - precalibrated_saved_model_dir, signature_keys, tags, - quantization_options.force_graph_mode_calibration(), - representative_dataset_file_map_serialized); + if (calibration_exported_model.has_graph_def()) { + TF_ASSIGN_OR_RETURN(std::string calibration_saved_model_dir, + CreateTmpDir()); + py_function_library.SaveExportedModel( + calibration_saved_model_dir, calibration_exported_model, + saved_model_path, tags, signature_def_map); + + py_function_library.RunCalibration( + calibration_saved_model_dir, signature_keys, tags, + quantization_options.force_graph_mode_calibration(), + representative_dataset_file_map_serialized); + } if (absl::Status status = AddCalibrationStatistics( *module_ref, calibration_data_dir, diff --git a/tensorflow/compiler/mlir/quantization/tensorflow/python/quantize_model.py b/tensorflow/compiler/mlir/quantization/tensorflow/python/quantize_model.py index 92ee947c1c3c2c..91c82710c489a9 100644 --- a/tensorflow/compiler/mlir/quantization/tensorflow/python/quantize_model.py +++ b/tensorflow/compiler/mlir/quantization/tensorflow/python/quantize_model.py @@ -638,7 +638,7 @@ def _populate_calibration_options( if calib_opts.calibration_data_dir: save_model.create_empty_output_dir( calib_opts.calibration_data_dir, - overwrite=True, + overwrite=calib_opts.force_regenerate_calibration_data, ) From b589eadd14cf20e46726d6d10fe98f50d11d8ccf Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Mon, 13 May 2024 22:48:43 -0700 Subject: [PATCH 091/478] Add AlgebraicSimplifier pass directly in front of HloConstantFolding. We have cases with DynamicSlice(Broadcast) which can be simplified away which would otherwise be constant folded. The HloEvaluator is really slow for DynamicSlice and Slice, as it will evalute the whole first operand, so if we can simplify it away, it will help. PiperOrigin-RevId: 633445573 --- third_party/xla/xla/service/gpu/gpu_compiler.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index 276a4f2c9929a7..a4086262b96a7a 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -574,7 +574,6 @@ absl::Status RunSPMDPasses( gpu_target_config.device_description.gpu_compute_capability(); spmd_simplify.AddPass( layout_insensitive_algsimp_opts, gpu_version); - spmd_simplify.AddPass(); spmd_simplify.AddPass(); spmd_simplify.AddPass( @@ -587,6 +586,15 @@ absl::Status RunSPMDPasses( ReshapeMoverOptions reshape_mover_options; reshape_mover_options.reshape_of_1d_broadcast_is_cheap = true; spmd_simplify.AddPass(reshape_mover_options); + // Run AlgebraicSimplifier directly before HloConstantFolding, because we + // need to simplify DynamicSlice(Broadcast) away. Constant folding of + // DynamicSlice can be quite costly, as the whole operand will be evaluated. + // We run AlgebraicSimplifier as HloPassFix to make sure all simplifications + // have been done before running HloConstantFolding. This is necessary + // because simplifications create new instructions which may not be visited + // in the same iteration of AlgebraicSimplifier. + spmd_simplify.AddPass>( + layout_insensitive_algsimp_opts); spmd_simplify.AddPass(); spmd_simplify.AddPass(); From ac2c81c3d492420550bce77439ae5ebc77c39150 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 May 2024 23:19:02 -0700 Subject: [PATCH 092/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633451656 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 53c362ed0ab819..68d78d0329a9ac 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugstr +go/nodeserialize op { name: "Abort" attr { From f32e67e533bfe2d73c3658e1c2716bbc1b2c7baa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 00:11:15 -0700 Subject: [PATCH 093/478] Improve documentation of mlir ops. PiperOrigin-RevId: 633462768 --- .../tensorflow/ir/host_runtime/tfrt_ops.td | 20 +++++++------------ .../compiler/mlir/tfrt/ir/mlrt/tf_mlrt_ops.td | 19 ++++++------------ 2 files changed, 13 insertions(+), 26 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/host_runtime/tfrt_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/host_runtime/tfrt_ops.td index e3736e96851262..e46a6500dfd516 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/host_runtime/tfrt_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/host_runtime/tfrt_ops.td @@ -94,23 +94,17 @@ Empty strings indicate that they are non-partitioned tensors.}]>:$shape_and_slic def TF_IfrtLoadVariableOp : TF_Op<"IfrtLoadVariable", [Pure]> { - let summary = "Loads a restored variable tensor as an IFRT array and tensor future"; + let summary = "Loads a restored variable tensor as a tensor future"; let description = [{ - This op loads a variable tensor as an IFRT array and binds it with the specified name. + This op loads a restored variable tensor as a tensor future. It is a + replacement of `tf.ReadVariableOp`. - This op is an replacement of `tf.ReadVariableOp` in the case that a constant - variable tensor is an input to the tpu program invoked by `tf.IfrtCall`. + This op returns a scalar string tensor containing the restored variable name, which can be + used as a key within the runtime, as well as a future for the tensor. - After a `tf.ReadVariableOp` is lowered into `tf.IfrtLoadVariableOp`, the `tf.IfrtCall` kernel - will bind the loaded IFRT array by name with the tpu program's input. - - `tf.IfrtLoadVariableOp` converts the tensor into an IFRT array based on device and sharding - configuration specified in `VariableDeviceShardingConfigProto`. - - This op returns a scalar string tensor containing the loaded variable name, which can be - used as a key to look for the loaded IFRT array in runtime and a restored tensor, which - maybe lowered to a future by runtime. + The `tf.IfrtCall` kernel uses the output $array_key. + Other ops executed by TFRT may make use of $tensor_future. }]; // TODO(b/339423851) Redefine the IfrtLoadVariableOp as it doesn't require the diff --git a/tensorflow/compiler/mlir/tfrt/ir/mlrt/tf_mlrt_ops.td b/tensorflow/compiler/mlir/tfrt/ir/mlrt/tf_mlrt_ops.td index ae4fa4be4f78c9..0659143f49b39b 100644 --- a/tensorflow/compiler/mlir/tfrt/ir/mlrt/tf_mlrt_ops.td +++ b/tensorflow/compiler/mlir/tfrt/ir/mlrt/tf_mlrt_ops.td @@ -449,21 +449,14 @@ def IfrtLoadVariableOp: TensorflowMlrt_Op<"ifrt_load_variable", [Pure]> { let summary = "Loads a variable tensor as an IFRT array for mlrt"; let description = [{ - This is the MLRT version of tf.IfrtLoadVariableOp. + This op loads a restored variable tensor as a tensor future. It is a + replacement of `tf.ReadVariableOp`. - This op loads a variable tensor as an IFRT array and binds it with the specified name. + This op returns a scalar string tensor containing the restored variable name, which can be + used as a key within the runtime, as well as a future for the tensor. - This op is an replacement of `tf.ReadVariableOp` in the case that a constant - variable tensor is an input to the tpu program invoked by `tf.IfrtCall`. - - After a `tf.ReadVariableOp` is lowered into `tf.IfrtLoadVariableOp`, the `tf.IfrtCall` kernel - will bind the loaded IFRT array by name with the tpu program's input. - - `tf.IfrtLoadVariableOp` converts the tensor into an IFRT array based on device and sharding - configuration specified in `VariableDeviceShardingConfigProto`. - - This op returns a scalar string tensor as a key for user to look for the loaded array - and a future containing the restored tensor. + The `tf.IfrtCall` kernel uses the output $array_key. + Other ops executed by TFRT may make use of $tensor_future. }]; let arguments = (ins From 24fb166319aec315431bb4fc826c1bfa3a9da43a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 01:17:37 -0700 Subject: [PATCH 094/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633477018 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 68d78d0329a9ac..5019fe2a5b2fad 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/nodeserialize +go/debugproto op { name: "Abort" attr { From d83f8918629653a50d239388a84adc676620bc1c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 01:19:17 -0700 Subject: [PATCH 095/478] Automated Code Change PiperOrigin-RevId: 633477369 --- tensorflow/lite/delegates/gpu/common/data_type_test.cc | 1 - tensorflow/lite/delegates/gpu/common/flops_util.cc | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/delegates/gpu/common/data_type_test.cc b/tensorflow/lite/delegates/gpu/common/data_type_test.cc index a88eb38047eda8..f7ee35c73145dd 100644 --- a/tensorflow/lite/delegates/gpu/common/data_type_test.cc +++ b/tensorflow/lite/delegates/gpu/common/data_type_test.cc @@ -15,7 +15,6 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/data_type.h" -#include #include namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/common/flops_util.cc b/tensorflow/lite/delegates/gpu/common/flops_util.cc index 88d7edad88fc57..3807f6b6a46274 100644 --- a/tensorflow/lite/delegates/gpu/common/flops_util.cc +++ b/tensorflow/lite/delegates/gpu/common/flops_util.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/flops_util.h" +#include "tensorflow/lite/delegates/gpu/common/shape.h" + namespace tflite { namespace gpu { From 16244310e5a73e8f187d6dab487fee0e80d4afd9 Mon Sep 17 00:00:00 2001 From: TJ Xu Date: Tue, 14 May 2024 01:48:26 -0700 Subject: [PATCH 096/478] PR #11563: [NVIDIA GPU] Improve GPU collective matmul to support all-gather having multiple users Imported from GitHub PR https://github.com/openxla/xla/pull/11563 We have identified another optimization opportunity for gpt-3 using collective matmul, in the backward pass, the all-gather has multiple dot users but current spmd will duplicate multiple collective matmul loops. We'd like this transformation: before: ``` // input // / | // / | // AG windowed loop // / // / // dot ``` after: ``` // input // | // | // windowed loop // | // | // dot ``` This is advantageous since the chained dot can fully utilize all the resource on the GPU while comm is hidden by the first collective matmul loop. We introduced an option to turn off CM loop duplication in SPMD and rewrite the graph to desired pattern in the gpu_windowed_einsum_handler pass. Copybara import of the project: -- 986ac94ab44d31f6d11ec6f135f6cfb2e5636d80 by TJ : Moved most of changes to gpu pass -- 44e81df91c235cac635f334c89d1d8a117ac6511 by TJ : Added e2e test for windowed einsum Minimized unit test hlo -- 8fc24a479de7515f532f36de8ffbcce49516c154 by TJ : Added explanations for spmd tests and dot_handler to skip multiple consumers -- 142d84d54db2b6291484443e43913d86c44a485c by TJ : move windowed einsum test to stateful_rng_spmd_partitioner_test -- 8b9fc43746136b40a814d93bf8086a687490fd7f by TJ : Changed e2e test back to include reducescatter Merging this change closes #11563 PiperOrigin-RevId: 633483864 --- third_party/xla/xla/service/gpu/BUILD | 2 + .../xla/xla/service/gpu/gpu_compiler.cc | 4 +- .../gpu/gpu_windowed_einsum_handler.cc | 162 ++++++++++++++++++ .../service/gpu/gpu_windowed_einsum_handler.h | 10 +- .../gpu/gpu_windowed_einsum_handler_test.cc | 95 ++++++++++ .../xla/xla/service/spmd/dot_handler.cc | 61 +++++-- .../xla/xla/service/spmd/spmd_partitioner.h | 4 + .../spmd/stateful_rng_spmd_partitioner.h | 27 ++- .../stateful_rng_spmd_partitioner_test.cc | 60 ++++++- .../xla/xla/tests/collective_ops_test_e2e.cc | 80 +++++++++ 10 files changed, 479 insertions(+), 26 deletions(-) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 34f2210e1d1b04..5faf098331bb14 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -6070,6 +6070,8 @@ xla_cc_test( "//xla:util", "//xla:xla_data_proto_cc", "//xla/hlo/ir:hlo", + "//xla/service:pattern_matcher", + "//xla/service:pattern_matcher_gmock", "//xla/tests:hlo_test_base", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/strings:string_view", diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index a4086262b96a7a..6c3e2e70e05422 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -640,7 +640,9 @@ absl::Status RunSPMDPasses( .xla_gpu_threshold_for_windowed_einsum_mib(), hlo_module->config() .debug_options() - .xla_gpu_multi_streamed_windowed_einsum()); + .xla_gpu_multi_streamed_windowed_einsum(), + /*skip_checking_windowed_einsum_users=*/true, + /*disable_ag_rewrite_for_multiple_consumers=*/true); spmd_pipeline.AddPass(); return spmd_pipeline.Run(hlo_module).status(); } else { diff --git a/third_party/xla/xla/service/gpu/gpu_windowed_einsum_handler.cc b/third_party/xla/xla/service/gpu/gpu_windowed_einsum_handler.cc index fa5339fc007f44..875d4d5eed8bfb 100644 --- a/third_party/xla/xla/service/gpu/gpu_windowed_einsum_handler.cc +++ b/third_party/xla/xla/service/gpu/gpu_windowed_einsum_handler.cc @@ -20,6 +20,7 @@ limitations under the License. #include "absl/container/flat_hash_set.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" +#include "xla/hlo/ir/dfs_hlo_visitor_with_default.h" #include "xla/hlo/ir/hlo_computation.h" #include "xla/hlo/ir/hlo_instruction.h" #include "xla/hlo/ir/hlo_module.h" @@ -142,6 +143,136 @@ absl::StatusOr HandleAgWindowedEinsumLoop(HloComputation* comp, return changed; } +absl::Status ProcessWindowedEinsumLoopForActivationCaching( + GpuWindowedEinsumHandler::WindowedEinsumAgLoops& ag_loop) { + HloInstruction* loop = ag_loop.loop; + // Transform the while body to cache the allgathered result in the + // output buffer to be consumed by the dot + HloComputation* while_body = loop->while_body(); + HloInstruction* input_gte; + for (HloInstruction* gte : while_body->parameter_instruction(0)->users()) { + if (gte->tuple_index() == 0) { + input_gte = gte; + } + } + // Get the output operand of the full buffer. + HloInstruction* root = while_body->root_instruction(); + // The full buffer that we will use to cache the accumulated activation + // is the 4th operand in the output tuple. + int64_t full_cache_buffer_index = 3; + HloInstruction* full_buffer_output_gte = + root->mutable_operand(full_cache_buffer_index); + HloInstruction* new_full_buffer_output; + // Find the DUS in the loop body and re-use the slice indices + // This should just be a constant(0) + HloInstruction* dus_boundary_constant; + for (HloInstruction* inst : while_body->MakeInstructionPostOrder()) { + HloInstruction* slice_indices; + // If we have a DUS(PARAM,DS) pattern, we need to update the output + // buffer with the first slice. + if (Match(inst, + m::DynamicUpdateSlice( + m::GetTupleElement(m::Parameter()), m::Op(), + m::Constant(&dus_boundary_constant), + m::Reshape(m::DynamicSlice(&slice_indices, m::Op(), m::Op())), + m::Op()))) { + slice_indices = while_body->AddInstruction(HloInstruction::CreateReshape( + dus_boundary_constant->shape(), slice_indices)); + VLOG(5) << "Created slice op for first slice: " + << slice_indices->ToString(); + full_buffer_output_gte = + while_body->AddInstruction(HloInstruction::CreateDynamicUpdateSlice( + full_buffer_output_gte->shape(), full_buffer_output_gte, + input_gte, + {dus_boundary_constant, slice_indices, dus_boundary_constant})); + } + // If we have a DUS(DUS,DS) pattern, then the einsum loop is + // unrolled, we need to update the output buffer again with the + // second slice. Since the second slice will have different indices, + // we need to re-capture slice_indices. + if (Match(inst, + m::DynamicUpdateSlice( + m::DynamicUpdateSlice(), m::Op(), m::Constant(), + m::Reshape(m::DynamicSlice(&slice_indices, m::Op(), m::Op())), + m::Op()))) { + slice_indices = while_body->AddInstruction(HloInstruction::CreateReshape( + dus_boundary_constant->shape(), slice_indices)); + VLOG(5) << "Created slice op for second slice: " + << slice_indices->ToString(); + // The slice we need this time is the output of the first + // collective-permute + HloInstruction* cp_output; + for (HloInstruction* gte_user : input_gte->users()) { + if (gte_user->opcode() == HloOpcode::kCollectivePermute) { + cp_output = gte_user; + break; + } + } + new_full_buffer_output = + while_body->AddInstruction(HloInstruction::CreateDynamicUpdateSlice( + full_buffer_output_gte->shape(), full_buffer_output_gte, + cp_output, + {dus_boundary_constant, slice_indices, dus_boundary_constant})); + } + } + TF_RETURN_IF_ERROR(root->ReplaceOperandWith(full_cache_buffer_index, + new_full_buffer_output)); + return OkStatus(); +} + +class WindowedEinsumVisitor : public DfsHloRewriteVisitor { + public: + explicit WindowedEinsumVisitor( + std::vector& + all_ag_loops) + : all_ag_loops_(all_ag_loops) {} + // Rewrites a allgather-dot pattern that shares the same operand + // with a windowed einsum loop to consume the output of the loop + // and remove the all-gather. + absl::Status HandleDot(HloInstruction* dot) override { + CHECK_EQ(dot->opcode(), HloOpcode::kDot); + for (GpuWindowedEinsumHandler::WindowedEinsumAgLoops ag_loop : + all_ag_loops_) { + HloInstruction* loop = ag_loop.loop; + HloInstruction* ag_operand = nullptr; + + if (Match(dot, m::Dot(m::AllGather(&ag_operand), m::Op())) || + Match(dot, m::Dot(m::Op(), m::AllGather(&ag_operand)))) { + HloInstruction* windowed_lhs = + loop->mutable_operand(0)->mutable_operand(0); + HloInstruction* ag_with_shared_operand = nullptr; + if (ag_operand && ag_operand->mutable_operand(0) == windowed_lhs) { + ag_with_shared_operand = ag_operand; + } + + if (!ag_with_shared_operand) { + continue; + } + + VLOG(5) << "Found all-gather that shares the same operand with a " + "windowed einsum loop : " + << loop->ToString(); + int64_t cache_output_index = dot->operand_index(ag_with_shared_operand); + HloComputation* comp = dot->parent(); + HloInstruction* new_gte = comp->AddInstruction( + HloInstruction::CreateGetTupleElement(loop, 3)); + TF_RETURN_IF_ERROR( + dot->ReplaceOperandWith(cache_output_index, new_gte)); + TF_RETURN_IF_ERROR(comp->RemoveInstruction(ag_with_shared_operand)); + if (!ag_loop.consumed) { + TF_RETURN_IF_ERROR( + ProcessWindowedEinsumLoopForActivationCaching(ag_loop)); + ag_loop.consumed = true; + } + } + } + return absl::OkStatus(); + } + + private: + std::vector& all_ag_loops_; +}; + } // namespace absl::StatusOr GpuWindowedEinsumHandler::Run( @@ -163,9 +294,40 @@ absl::StatusOr GpuWindowedEinsumHandler::Run( VLOG(5) << "Processing computation: " << comp->name(); TF_ASSIGN_OR_RETURN(bool comp_result, HandleAgWindowedEinsumLoop(comp, stream_id)); + all_ag_loops_.push_back( + WindowedEinsumAgLoops(comp->WhileCallInstruction())); changed = comp_result; } } + // Now that we have processed all loops, we can check if there are any + // allgather-dot pattern that we can optimize. We'd want to transform: + // input + // / | + // / | + // AG windowed loop + // / + // / + // dot + // to: + // input + // | + // | + // windowed loop + // | + // | + // dot + // The windowed einsum loop will also be rewritten to output the full input to + // be consumed by the dot. + // This is advantageous since the chained dot can fully utilize all the + // resources on the GPU while comm is hidden by the first collective matmul + // loop. + for (HloComputation* comp : + module->MakeNonfusionComputations(execution_threads)) { + WindowedEinsumVisitor visitor(all_ag_loops_); + TF_RETURN_IF_ERROR(comp->Accept(&visitor)); + changed |= visitor.changed(); + } + XLA_VLOG_LINES( 5, "GpuWindowedEinsumHandler::Run(), after:\n" + module->ToString()); return changed; diff --git a/third_party/xla/xla/service/gpu/gpu_windowed_einsum_handler.h b/third_party/xla/xla/service/gpu/gpu_windowed_einsum_handler.h index 87ec1474d576f7..8f42cbc00fb3ab 100644 --- a/third_party/xla/xla/service/gpu/gpu_windowed_einsum_handler.h +++ b/third_party/xla/xla/service/gpu/gpu_windowed_einsum_handler.h @@ -38,16 +38,24 @@ class GpuWindowedEinsumHandler : public HloModulePass { return "gpu-windowed-einsum-handler"; } + struct WindowedEinsumAgLoops { + WindowedEinsumAgLoops(HloInstruction* loop) : loop(loop) {} + HloInstruction* loop; + bool consumed = false; + }; + using HloPassInterface::Run; absl::StatusOr Run( HloModule* module, const absl::flat_hash_set& execution_threads) override; - private: constexpr static const char* kWindowedEinsumRsLoopName = "windowed_dot_general_body_rs"; constexpr static const char* kWindowedEinsumAgLoopName = "windowed_dot_general_body_ag"; + + private: + std::vector all_ag_loops_; }; } // namespace xla::gpu diff --git a/third_party/xla/xla/service/gpu/gpu_windowed_einsum_handler_test.cc b/third_party/xla/xla/service/gpu/gpu_windowed_einsum_handler_test.cc index c70fbf2b08d126..b0bce884392253 100644 --- a/third_party/xla/xla/service/gpu/gpu_windowed_einsum_handler_test.cc +++ b/third_party/xla/xla/service/gpu/gpu_windowed_einsum_handler_test.cc @@ -23,12 +23,16 @@ limitations under the License. #include "xla/hlo/ir/hlo_instruction.h" #include "xla/hlo/ir/hlo_module.h" #include "xla/service/gpu/backend_configs.pb.h" +#include "xla/service/pattern_matcher.h" +#include "xla/service/pattern_matcher_gmock.h" #include "xla/tests/hlo_test_base.h" #include "tsl/platform/statusor.h" namespace xla::gpu { namespace { +namespace m = ::xla::match; + using GpuWindowedEinsumHanlderTest = HloTestBase; HloInstruction* FindInstructionByName(HloComputation* comp, std::string name) { @@ -193,5 +197,96 @@ ENTRY main.9_spmd { cp1->backend_config()->force_earliest_schedule()); } +TEST_F(GpuWindowedEinsumHanlderTest, AgLoopsMultipleConsumersAreChained) { + constexpr absl::string_view kHloString = R"( +HloModule pjit__unnamed_wrapped_function_, entry_computation_layout={(bf16[2,512,24576]{2,1,0}, bf16[24576,24576]{1,0}, bf16[24576,24576]{1,0})->bf16[2,2048,24576]{2,1,0}}, num_partitions=4 + +windowed_dot_general_body_ag { + param.1 = (bf16[2,512,24576]{2,1,0}, bf16[24576,24576]{1,0}, bf16[2,2048,24576]{2,1,0}, bf16[2,2048,24576]{2,1,0}, u32[]) parameter(0) + get-tuple-element.1 = bf16[2,512,24576]{2,1,0} get-tuple-element(param.1), index=0 + collective-permute = bf16[2,512,24576]{2,1,0} collective-permute(get-tuple-element.1), channel_id=2, source_target_pairs={{0,3},{1,0},{2,1},{3,2}} + collective-permute.1 = bf16[2,512,24576]{2,1,0} collective-permute(collective-permute), channel_id=3, source_target_pairs={{0,3},{1,0},{2,1},{3,2}} + get-tuple-element.2 = bf16[24576,24576]{1,0} get-tuple-element(param.1), index=1 + get-tuple-element.3 = bf16[2,2048,24576]{2,1,0} get-tuple-element(param.1), index=2 + dot = bf16[2,512,24576]{2,1,0} dot(get-tuple-element.1, get-tuple-element.2), lhs_contracting_dims={2}, rhs_contracting_dims={0} + constant.2 = s32[] constant(0) + constant.3 = s32[4]{0} constant({0, 512, 1024, 1536}) + get-tuple-element.5 = u32[] get-tuple-element(param.1), index=4 + partition-id = u32[] partition-id() + add = u32[] add(get-tuple-element.5, partition-id) + constant.1 = u32[] constant(4) + remainder = u32[] remainder(add, constant.1) + dynamic-slice = s32[1]{0} dynamic-slice(constant.3, remainder), dynamic_slice_sizes={1} + reshape = s32[] reshape(dynamic-slice) + dynamic-update-slice = bf16[2,2048,24576]{2,1,0} dynamic-update-slice(get-tuple-element.3, dot, constant.2, reshape, constant.2) + dot.1 = bf16[2,512,24576]{2,1,0} dot(collective-permute, get-tuple-element.2), lhs_contracting_dims={2}, rhs_contracting_dims={0} + constant.5 = u32[] constant(1) + add.1 = u32[] add(get-tuple-element.5, constant.5) + add.2 = u32[] add(add.1, partition-id) + remainder.1 = u32[] remainder(add.2, constant.1) + dynamic-slice.1 = s32[1]{0} dynamic-slice(constant.3, remainder.1), dynamic_slice_sizes={1} + reshape.1 = s32[] reshape(dynamic-slice.1) + dynamic-update-slice.1 = bf16[2,2048,24576]{2,1,0} dynamic-update-slice(dynamic-update-slice, dot.1, constant.2, reshape.1, constant.2) + get-tuple-element.4 = bf16[2,2048,24576]{2,1,0} get-tuple-element(param.1), index=3 + add.3 = u32[] add(add.1, constant.5) + ROOT tuple = (bf16[2,512,24576]{2,1,0}, bf16[24576,24576]{1,0}, bf16[2,2048,24576]{2,1,0}, bf16[2,2048,24576]{2,1,0}, u32[]) tuple(collective-permute.1, get-tuple-element.2, dynamic-update-slice.1, get-tuple-element.4, add.3) +} // windowed_dot_general_body_ag + +windowed_dot_general_cond_ag { + param = (bf16[2,512,24576]{2,1,0}, bf16[24576,24576]{1,0}, bf16[2,2048,24576]{2,1,0}, bf16[2,2048,24576]{2,1,0}, u32[]) parameter(0) + get-tuple-element = u32[] get-tuple-element(param), index=4 + constant = u32[] constant(4) + ROOT compare = pred[] compare(get-tuple-element, constant), direction=LT +} + +ENTRY main.12_spmd { + param.4 = bf16[2,512,24576]{2,1,0} parameter(0), sharding={devices=[1,4,1]<=[4]} + param.5 = bf16[24576,24576]{1,0} parameter(1), sharding={devices=[1,4]<=[4]} + constant.22 = bf16[] constant(0) + broadcast = bf16[2,2048,24576]{2,1,0} broadcast(constant.22), dimensions={} + constant.24 = u32[] constant(0) + tuple.2 = (bf16[2,512,24576]{2,1,0}, bf16[24576,24576]{1,0}, bf16[2,2048,24576]{2,1,0}, bf16[2,2048,24576]{2,1,0}, u32[]) tuple(param.4, param.5, broadcast, broadcast, constant.24) + while = (bf16[2,512,24576]{2,1,0}, bf16[24576,24576]{1,0}, bf16[2,2048,24576]{2,1,0}, bf16[2,2048,24576]{2,1,0}, u32[]) while(tuple.2), condition=windowed_dot_general_cond_ag, body=windowed_dot_general_body_ag + get-tuple-element.13 = bf16[2,2048,24576]{2,1,0} get-tuple-element(while), index=2 + copy.1 = bf16[2,2048,24576]{2,1,0} copy(get-tuple-element.13) + all-gather = bf16[2,2048,24576]{2,1,0} all-gather(param.4), channel_id=1, replica_groups={{0,1,2,3}}, dimensions={1}, use_global_device_ids=true + param.6 = bf16[24576,24576]{1,0} parameter(2), sharding={devices=[1,4]<=[4]} + ROOT dot.7 = bf16[2,2048,24576]{2,1,0} dot(all-gather, param.6), lhs_contracting_dims={2}, rhs_contracting_dims={0} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnVerifiedModule(kHloString)); + + GpuWindowedEinsumHandler gpu_handler; + bool changed; + TF_ASSERT_OK_AND_ASSIGN(changed, gpu_handler.Run(module.get())); + EXPECT_TRUE(changed); + + HloInstruction* ag_loop = + FindInstructionByName(module->entry_computation(), "while"); + HloInstruction* inst = + FindInstructionByName(module->entry_computation(), "dot.7"); + // dot.7 should now consume output of the windowed einsum while loop. + EXPECT_EQ(inst->operand(0)->opcode(), HloOpcode::kGetTupleElement); + EXPECT_EQ(inst->operand(0)->tuple_index(), 3); + EXPECT_EQ(inst->operand(0)->operand(0), ag_loop); + + // while loop's root should now have a chain of DUS. + HloInstruction* ag_while_root = ag_loop->while_body()->root_instruction(); + EXPECT_THAT(ag_while_root, + GmockMatch(m::Tuple( + m::Op(), m::Op(), m::Op(), + m::DynamicUpdateSlice( + m::DynamicUpdateSlice( + m::GetTupleElement(m::Parameter()) + .WithPredicate([](const HloInstruction* instr) { + return instr->tuple_index() == 3; + }), + m::Op(), m::Op(), m::Op(), m::Op()), + m::Op(), m::Op(), m::Op(), m::Op()), + m::Op()))); +} + } // namespace } // namespace xla::gpu diff --git a/third_party/xla/xla/service/spmd/dot_handler.cc b/third_party/xla/xla/service/spmd/dot_handler.cc index 8115a78794672d..47909ff72a5801 100644 --- a/third_party/xla/xla/service/spmd/dot_handler.cc +++ b/third_party/xla/xla/service/spmd/dot_handler.cc @@ -1909,22 +1909,57 @@ absl::StatusOr PartitionBaseCase( } } + // If we see a dot that shares the same operand with a windowed einsum ag loop + // and disable_ag_rewrite_for_multiple_consumers is true. We skip rewriting + // the current dot. We also skip any reshape operand as long as it only has + // the lhs or rhs of the dot as the only user since reshape ops won't change + // the functional meaning of the pattern. + auto has_reshape_operand = [](PartitionedHlo& hlo) -> bool { + return hlo.hlo()->opcode() == HloOpcode::kReshape || + hlo.hlo()->opcode() == HloOpcode::kBitcast || + hlo.hlo()->opcode() == HloOpcode::kTranspose; + }; + bool should_skip_windowed_einsum = false; + if (options.disable_ag_rewrite_for_multiple_consumers) { + auto lhs_operand = + has_reshape_operand(lhs) ? lhs.hlo()->operand(0) : lhs.hlo(); + auto rhs_operand = + has_reshape_operand(rhs) ? rhs.hlo()->operand(0) : rhs.hlo(); + for (auto loop : *windowed_dot_general_loops) { + if (loop.while_loop->while_body()->name().find( + "windowed_dot_general_body_ag") == 0) { + auto cm_lhs = loop.while_loop->operand(0)->operand(0); + if (cm_lhs == lhs_operand || cm_lhs == rhs_operand) { + VLOG(2) << "Skip processing: " << original_hlo->ToString(); + VLOG(2) << "It shares the same operand with " + << loop.while_loop->ToString() + << " and disable_ag_rewrite_for_multiple_consumers is set to " + "true."; + should_skip_windowed_einsum = true; + } + } + } + } + // Hard limit on iteration count based on empirical data (above this amount // there's pretty significant overhead). constexpr int64_t kMaxIterations = 32; - std::optional e_config = GetWindowedEinsumConfiguration( - num_partitions, output_lhs_non_contracting_partitions, - output_rhs_non_contracting_partitions, rhs_contracting_partitions, - rhs_non_contracting_partitions, rhs_batch_partitions, - lhs_contracting_partitions, lhs_non_contracting_partitions, - lhs_batch_partitions, ShapeSizeInBytes(rhs.base_shape()), - ShapeSizeInBytes(lhs.base_shape()), ShapeSizeInBytes(output_base_shape), - options, output_sharding_transposed_to_match_lhs, - output_sharding_transposed_to_match_rhs, - lhs_sharding_transposed_to_match_rhs, - rhs_sharding_transposed_to_match_lhs, lhs_sharding, rhs_sharding, - conv_window, dims_mapping, visitor->call_graph(), kMaxIterations, - original_hlo, &lhs, &rhs, create_sharded_dot, b, module, visitor); + std::optional e_config = std::nullopt; + if (!should_skip_windowed_einsum) { + e_config = GetWindowedEinsumConfiguration( + num_partitions, output_lhs_non_contracting_partitions, + output_rhs_non_contracting_partitions, rhs_contracting_partitions, + rhs_non_contracting_partitions, rhs_batch_partitions, + lhs_contracting_partitions, lhs_non_contracting_partitions, + lhs_batch_partitions, ShapeSizeInBytes(rhs.base_shape()), + ShapeSizeInBytes(lhs.base_shape()), ShapeSizeInBytes(output_base_shape), + options, output_sharding_transposed_to_match_lhs, + output_sharding_transposed_to_match_rhs, + lhs_sharding_transposed_to_match_rhs, + rhs_sharding_transposed_to_match_lhs, lhs_sharding, rhs_sharding, + conv_window, dims_mapping, visitor->call_graph(), kMaxIterations, + original_hlo, &lhs, &rhs, create_sharded_dot, b, module, visitor); + } if (e_config) { VLOG(2) << "Emit windowed dot."; return EmitWindowedDotGeneral( diff --git a/third_party/xla/xla/service/spmd/spmd_partitioner.h b/third_party/xla/xla/service/spmd/spmd_partitioner.h index 4a8dc50b8b7fec..c137a821c333a8 100644 --- a/third_party/xla/xla/service/spmd/spmd_partitioner.h +++ b/third_party/xla/xla/service/spmd/spmd_partitioner.h @@ -89,6 +89,10 @@ struct SpmdPartitionerOptions { bool enable_windowed_einsum_for_all_gather = true; // Enables windowed einsum for result reduce-scatter. bool enable_windowed_einsum_for_reduce_scatter = true; + + // Whether disable rewrite for dots that share the same + // operand as an already rewritten windowed einsum loop. + bool disable_ag_rewrite_for_multiple_consumers = false; }; // Class to wrap the computation builder to capture information during SPMD diff --git a/third_party/xla/xla/service/spmd/stateful_rng_spmd_partitioner.h b/third_party/xla/xla/service/spmd/stateful_rng_spmd_partitioner.h index 5d9170e67e6277..dd3e2635076a8a 100644 --- a/third_party/xla/xla/service/spmd/stateful_rng_spmd_partitioner.h +++ b/third_party/xla/xla/service/spmd/stateful_rng_spmd_partitioner.h @@ -45,13 +45,18 @@ class StatefulRngSpmdPartitioningVisitor class StatefulRngSpmdPartitioner : public spmd::SpmdPartitioner { public: - StatefulRngSpmdPartitioner(int64_t num_partitions, int64_t num_replicas, - int64_t threshold_for_windowed_einsum_mib = 100000, - bool windowed_einsum_use_multiple_streams = false) - : spmd::SpmdPartitioner( - num_partitions, num_replicas, - GetSpmdPartitionerOptions(threshold_for_windowed_einsum_mib, - windowed_einsum_use_multiple_streams)) {} + StatefulRngSpmdPartitioner( + int64_t num_partitions, int64_t num_replicas, + int64_t threshold_for_windowed_einsum_mib = 100000, + bool windowed_einsum_use_multiple_streams = false, + bool skip_checking_windowed_einsum_users = false, + bool disable_ag_rewrite_for_multiple_consumers = false) + : spmd::SpmdPartitioner(num_partitions, num_replicas, + GetSpmdPartitionerOptions( + threshold_for_windowed_einsum_mib, + windowed_einsum_use_multiple_streams, + skip_checking_windowed_einsum_users, + disable_ag_rewrite_for_multiple_consumers)) {} protected: std::unique_ptr CreateVisitor( @@ -70,12 +75,18 @@ class StatefulRngSpmdPartitioner : public spmd::SpmdPartitioner { private: static spmd::SpmdPartitionerOptions GetSpmdPartitionerOptions( int64_t threshold_for_windowed_einsum_mib, - bool windowed_einsum_use_multiple_streams = false) { + bool windowed_einsum_use_multiple_streams = false, + bool skip_checking_windowed_einsum_users = false, + bool disable_ag_rewrite_for_multiple_consumers = false) { spmd::SpmdPartitionerOptions options; options.allow_module_signature_change = true; options.threshold_for_windowed_einsum_mib = threshold_for_windowed_einsum_mib; options.unroll_windowed_einsum = windowed_einsum_use_multiple_streams; + options.skip_checking_windowed_einsum_users = + skip_checking_windowed_einsum_users; + options.disable_ag_rewrite_for_multiple_consumers = + disable_ag_rewrite_for_multiple_consumers; return options; } }; diff --git a/third_party/xla/xla/service/spmd/stateful_rng_spmd_partitioner_test.cc b/third_party/xla/xla/service/spmd/stateful_rng_spmd_partitioner_test.cc index d301f0ccdcdd74..0ae7efe0daf4f4 100644 --- a/third_party/xla/xla/service/spmd/stateful_rng_spmd_partitioner_test.cc +++ b/third_party/xla/xla/service/spmd/stateful_rng_spmd_partitioner_test.cc @@ -34,11 +34,25 @@ namespace xla { namespace spmd { namespace { +int64_t CountInstructions(const HloComputation &computation, HloOpcode opcode) { + int64_t count = 0; + for (const auto &instruction : computation.instructions()) { + if (instruction->opcode() == opcode) { + count++; + } + } + return count; +} + class StatefulRngSpmdPartitionerTest : public HloTestBase { public: absl::StatusOr> PartitionComputation( absl::string_view hlo_module, int64_t num_partitions, - std::function add_passes = nullptr) { + std::function add_passes = nullptr, + int64_t threshold_for_windowed_einsum_mib = 1000000, + bool windowed_einsum_use_multiple_streams = false, + bool skip_checking_windowed_einsum_users = false, + bool disable_ag_rewrite_for_multiple_consumers = false) { TF_ASSIGN_OR_RETURN( auto module, ParseAndReturnVerifiedModule( hlo_module, GetModuleConfigForTest( @@ -51,8 +65,12 @@ class StatefulRngSpmdPartitionerTest : public HloTestBase { add_passes(pass); } pass.AddPass(/*is_spmd=*/true); - pass.AddPass(num_partitions, - /*num_replicas=*/1); + pass.AddPass( + num_partitions, + /*num_replicas=*/1, threshold_for_windowed_einsum_mib, + windowed_einsum_use_multiple_streams, + skip_checking_windowed_einsum_users, + disable_ag_rewrite_for_multiple_consumers); pass.AddPass(/*layout_sensitive=*/false, /*allow_mixed_precision=*/false); TF_RETURN_IF_ERROR(pass.Run(module.get()).status()); @@ -116,6 +134,42 @@ ENTRY entry { VerifyNoAllReduce(module.get()); } +TEST_F(StatefulRngSpmdPartitionerTest, + EinsumDisableRewriteForAgWithMultipleConsumers) { + absl::string_view hlo_string = R"( +HloModule test, entry_computation_layout={(bf16[2,2048,24576]{2,1,0}, bf16[24576,98304]{1,0}, bf16[24576,98304]{1,0})->bf16[2,2048,98304]{2,1,0}}, num_partitions=4 + +ENTRY main { + Arg_0.1 = bf16[2,2048,24576]{2,1,0} parameter(0), sharding={devices=[1,4,1]<=[4]} + Arg_1.2 = bf16[24576,98304]{1,0} parameter(1), sharding={devices=[1,4]<=[4]} + dot.5 = bf16[2,2048,98304]{2,1,0} dot(Arg_0.1, Arg_1.2), lhs_contracting_dims={2}, rhs_contracting_dims={0}, sharding={devices=[1,1,4]<=[4]} + Arg_2.3 = bf16[24576,98304]{1,0} parameter(2), sharding={devices=[1,4]<=[4]} + dot.6 = bf16[2,2048,98304]{2,1,0} dot(Arg_0.1, Arg_2.3), lhs_contracting_dims={2}, rhs_contracting_dims={0}, sharding={devices=[1,1,4]<=[4]} + ROOT add.8 = bf16[2,2048,98304]{2,1,0} add(dot.5, dot.6), sharding={devices=[1,1,4]<=[4]} +} + +)"; + // With disable_ag_rewrite_for_multiple_consumers set to true, we expect only + // 1 while loop to exist which is the rewritten windowed einsum loop for the + // first ag->dot pattern. The second dot which shares the same operand with + // the loop will remain as is. + TF_ASSERT_OK_AND_ASSIGN( + auto module, PartitionComputation( + hlo_string, /*num_partitions=*/4, /*add_passes=*/nullptr, + /*threshold_for_windowed_einsum_mib=*/0, + /*windowed_einsum_use_multiple_streams=*/true, + /*skip_checking_windowed_einsum_users=*/true, + /*disable_ag_rewrite_for_multiple_consumers=*/true)); + XLA_VLOG_LINES(1, module->ToString()); + EXPECT_EQ(CountInstructions(*module->entry_computation(), HloOpcode::kWhile), + 1); + EXPECT_EQ(CountInstructions(*module->entry_computation(), HloOpcode::kDot), + 1); + EXPECT_EQ( + CountInstructions(*module->entry_computation(), HloOpcode::kAllGather), + 1); +} + TEST_F(StatefulRngSpmdPartitionerTest, VerifyThresholdSetCorrectly) { auto debug_options = HloTestBase::GetDebugOptionsForTest(); int64_t threshold = 400; diff --git a/third_party/xla/xla/tests/collective_ops_test_e2e.cc b/third_party/xla/xla/tests/collective_ops_test_e2e.cc index c6885ab13ee5be..d40ce579a41e06 100644 --- a/third_party/xla/xla/tests/collective_ops_test_e2e.cc +++ b/third_party/xla/xla/tests/collective_ops_test_e2e.cc @@ -647,5 +647,85 @@ TEST_F(CollectiveOpsTestE2E, NoAllToAllDecomposition) { LiteralTestUtil::ExpectR1Equal({20, 25, 21, 26}, results[1]); } +TEST_F(CollectiveOpsTestE2E, WindowedEinsumE2EAllgatherMultiConsumer) { + absl::string_view kModuleReplicatedStr = R"( +HloModule pjit__unnamed_wrapped_function_, entry_computation_layout={(bf16[2,16,48]{2,1,0}, bf16[48,192]{1,0}, bf16[48,192]{1,0}, bf16[192,48]{1,0})->bf16[2,16,48]{2,1,0}}, allow_spmd_sharding_propagation_to_parameters={false,false,false,false}, num_partitions=4 + +ENTRY main.12 { + Arg_0.1 = bf16[2,16,48]{2,1,0} parameter(0), sharding={devices=[1,4,1]<=[4]} + Arg_1.2 = bf16[48,192]{1,0} parameter(1), sharding={devices=[1,4]<=[4]} + dot.5 = bf16[2,16,192]{2,1,0} dot(Arg_0.1, Arg_1.2), lhs_contracting_dims={2}, rhs_contracting_dims={0} + custom-call.7 = bf16[2,16,192]{2,1,0} custom-call(dot.5), custom_call_target="Sharding", sharding={devices=[1,1,4]<=[4]} + Arg_2.3 = bf16[48,192]{1,0} parameter(2), sharding={devices=[1,4]<=[4]} + dot.6 = bf16[2,16,192]{2,1,0} dot(Arg_0.1, Arg_2.3), lhs_contracting_dims={2}, rhs_contracting_dims={0} + add.8 = bf16[2,16,192]{2,1,0} add(custom-call.7, dot.6) + Arg_3.4 = bf16[192,48]{1,0} parameter(3), sharding={devices=[4,1]<=[4]} + dot.9 = bf16[2,16,48]{2,1,0} dot(add.8, Arg_3.4), lhs_contracting_dims={2}, rhs_contracting_dims={0} + tuple.10 = (bf16[2,16,48]{2,1,0}) tuple(dot.9) + ROOT get-tuple-element.11 = bf16[2,16,48]{2,1,0} get-tuple-element(tuple.10), index=0, sharding={devices=[1,4,1]<=[4]} +} // main.12 +)"; + + const int64_t kNumReplicas = 1; + const int64_t kNumPartitions = 4; + + HloModuleConfig config = + GetModuleConfigForTest(/*replica_count=*/kNumReplicas); + auto opts = GetDebugOptionsForTest(); + opts.set_xla_gpu_threshold_for_windowed_einsum_mib(0); + opts.set_xla_gpu_multi_streamed_windowed_einsum(true); + opts.set_xla_gpu_graph_min_graph_size(200); + opts.set_xla_gpu_enable_triton_gemm(false); + config.set_debug_options(opts); + config.set_num_partitions(kNumPartitions); + TF_ASSERT_OK_AND_ASSIGN( + auto module, ParseAndReturnVerifiedModule(kModuleReplicatedStr, config)); + DeviceAssignment assn(/*replica_count=*/kNumReplicas, + /*computation_count=*/kNumPartitions); + config.set_replica_count(kNumReplicas); + for (int64_t i = 0; i < kNumPartitions; ++i) { + assn(0, i) = i; + } + + auto fake_arguments = xla::MakeFakeArguments(module.get()).value(); + std::vector fake_ptrs(fake_arguments.size()); + for (int i = 0; i < fake_arguments.size(); i++) { + fake_ptrs[i] = &fake_arguments[i]; + } + + TF_ASSERT_OK_AND_ASSIGN( + std::vector results, + HloTestBase::ExecuteReplicated( + std::move(module), fake_ptrs, kNumPartitions, &assn, + true /*run_hlo_passes*/, true /*use-threads*/)); + ASSERT_EQ(results.size(), kNumPartitions); + HloModuleConfig ref_config = + GetModuleConfigForTest(/*replica_count=*/kNumReplicas); + auto ref_opts = GetDebugOptionsForTest(); + ref_opts.set_xla_gpu_graph_min_graph_size(200); + ref_opts.set_xla_gpu_enable_triton_gemm(false); + ref_config.set_debug_options(ref_opts); + ref_config.set_num_partitions(kNumPartitions); + TF_ASSERT_OK_AND_ASSIGN( + auto ref_module, + ParseAndReturnVerifiedModule(kModuleReplicatedStr, ref_config)); + auto fake_ref_arguments = xla::MakeFakeArguments(ref_module.get()).value(); + std::vector ref_fake_ptrs(fake_ref_arguments.size()); + for (int i = 0; i < fake_ref_arguments.size(); i++) { + ref_fake_ptrs[i] = &fake_ref_arguments[i]; + } + + TF_ASSERT_OK_AND_ASSIGN( + std::vector ref_results, + HloTestBase::ExecuteReplicated( + std::move(ref_module), ref_fake_ptrs, kNumPartitions, &assn, + true /*run_hlo_passes*/, true /*use-threads*/)); + ASSERT_EQ(ref_results.size(), kNumPartitions); + ErrorSpec error_spec{1e-2, 1e-2}; + // Results should be the same between windowed einsum and non-windowed cases + for (int i = 0; i < kNumPartitions; i++) { + EXPECT_TRUE(LiteralTestUtil::Near(ref_results[i], results[i], error_spec)); + } +} } // namespace } // namespace xla From 934c0de9e0c19f8b93ce2f290764991150f5bb8a Mon Sep 17 00:00:00 2001 From: Mohammed Anany Date: Tue, 14 May 2024 01:53:21 -0700 Subject: [PATCH 097/478] [XLA:GPU] Refactor inputPrecision condition for Triton to use HloAnyOf without requiring adaptors PiperOrigin-RevId: 633484897 --- .../xla/xla/service/gpu/ir_emitter_triton.cc | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton.cc index c190665f71e4e3..20f290e27ff18a 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton.cc @@ -1942,18 +1942,15 @@ absl::Status EmitMatMul(mlir::OpBuilder builder, const HloInstruction* root = dot_instr->parent()->root_instruction(); TF_RET_CHECK(!root->shape().IsTuple()); - auto fusion_adaptor = HloFusionAdaptor::ForComputation(computation); - HloInstructionAdaptor instr_adaptor{*instr, fusion_adaptor.get()}; // TODO(b/320659359) Allow TF32 for 8-bit or less types with F32. - bool is_8_bit_or_less_dot_with_F32 = HloAnyOf( - instr_adaptor.GetOperands(), *fusion_adaptor, - [&](HloInstructionAdaptor node) { - if (node.opcode() != HloOpcode::kConvert) { + bool is_unsupported_bitwidth = + HloAnyOf({dot_instr}, [&](const HloInstruction* node) { + if (node->opcode() != HloOpcode::kConvert) { return false; } - Type in_type = - TritonType(builder, node.GetOperand(0).shape().element_type()); - Type out_type = TritonType(builder, node.shape().element_type()); + auto in_type = + TritonType(builder, node->operand(0)->shape().element_type()); + Type out_type = TritonType(builder, node->shape().element_type()); return in_type.getIntOrFloatBitWidth() <= 8 && out_type.isF32(); }); @@ -2168,7 +2165,7 @@ absl::Status EmitMatMul(mlir::OpBuilder builder, // lower precision than the output type. The change was introduced here: // https://github.com/openai/triton/commit/31b0c521427109a8eda609b58d756c380b21599a auto input_precision = - IsTf32Allowed(dot_instr) && !is_8_bit_or_less_dot_with_F32 + IsTf32Allowed(dot_instr) && !is_unsupported_bitwidth ? mt::InputPrecision::TF32 : mt::InputPrecision::IEEE; accumulator_next = From ca03c7863ceadfe2247e0d0a1f9b1e9e3854f4a7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 02:02:33 -0700 Subject: [PATCH 098/478] compat: Update forward compatibility horizon to 2024-05-14 PiperOrigin-RevId: 633487032 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 2a9869afa5dfb4..f372d9e16c9363 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -29,7 +29,7 @@ # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2024, 5, 13) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2024, 5, 14) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 1581da5b8d475216d3f1938e8f001db2f03c508b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 02:02:34 -0700 Subject: [PATCH 099/478] Update GraphDef version to 1862. PiperOrigin-RevId: 633487039 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index aafe138460a6db..e6d5b27b16cb2c 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 1861 // Updated: 2024/5/13 +#define TF_GRAPH_DEF_VERSION 1862 // Updated: 2024/5/14 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 360088f54467b4fd7491f7f9d9f32da59ca6c958 Mon Sep 17 00:00:00 2001 From: Sergey Kozub Date: Tue, 14 May 2024 02:52:44 -0700 Subject: [PATCH 100/478] Fix tile size for matmuls with 8-bit operands in autotuner The result of `GetExhaustiveTritonConfigs` is cached in the autotuner class instance, so should not depend on the input dot instruction. Therefore, removing the `has_8_bit_operand` parameter from the method signature. Instead, adjust the `block_k` attribute in the config normalization phase. Additionally, treat 8-bit floating point types in the same manner as integer types. PiperOrigin-RevId: 633506809 --- .../xla/service/gpu/gemm_fusion_autotuner.cc | 38 +++++++++---------- .../xla/service/gpu/gemm_fusion_autotuner.h | 3 +- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/third_party/xla/xla/service/gpu/gemm_fusion_autotuner.cc b/third_party/xla/xla/service/gpu/gemm_fusion_autotuner.cc index d3d09a21c048f1..d5b34fb877add3 100644 --- a/third_party/xla/xla/service/gpu/gemm_fusion_autotuner.cc +++ b/third_party/xla/xla/service/gpu/gemm_fusion_autotuner.cc @@ -51,6 +51,7 @@ limitations under the License. #include "xla/hlo/ir/hlo_module.h" #include "xla/hlo/ir/hlo_opcode.h" #include "xla/hlo/utils/hlo_query.h" +#include "xla/primitive_util.h" #include "xla/service/algorithm_util.h" #include "xla/service/dump.h" #include "xla/service/executable.h" @@ -115,8 +116,6 @@ namespace { // Minimum tile size. constexpr int kMinTileSize = 16; -constexpr int kDimKMinTileSize = 32; -constexpr std::array kDimKBlockSizes = {32, 64, 128, 256, 512}; // Default tiling when autotuning is disabled. constexpr TritonGemmConfig kDefaultGemmTiling = {32, 32, 32, 1, 1, 4}; @@ -277,8 +276,7 @@ struct TileSizeLimit { int block_k = 0; }; -absl::StatusOr GetLimits(const HloDotInstruction& dot, - bool has_8_bit_operand) { +absl::StatusOr GetLimits(const HloDotInstruction& dot) { TF_ASSIGN_OR_RETURN(int64_t non_contracting_index_lhs, NonContractingDimensionIndex(dot, /*operand_number=*/0)); TF_ASSIGN_OR_RETURN(int64_t non_contracting_index_rhs, @@ -306,8 +304,7 @@ absl::StatusOr GetLimits(const HloDotInstruction& dot, return TileSizeLimit{ /*block_m=*/std::max(max_m, kMinTileSize), /*block_n=*/std::max(max_n, kMinTileSize), - /*block_k=*/ - std::max(max_k, has_8_bit_operand ? kDimKMinTileSize : kMinTileSize), + /*block_k=*/std::max(max_k, kMinTileSize), }; } @@ -587,18 +584,18 @@ GemmFusionAutotunerImpl::GenerateTritonConfigs(const HloDotInstruction& dot) { return false; } auto in_type = node->operand(0)->shape().element_type(); - return (in_type == PrimitiveType::S8) || (in_type == PrimitiveType::U8); + return primitive_util::BitWidth(in_type) == 8; }); std::vector result_configs; - TF_ASSIGN_OR_RETURN(TileSizeLimit limits, GetLimits(dot, has_8_bit_operand)); + TF_ASSIGN_OR_RETURN(TileSizeLimit limits, GetLimits(dot)); // Generate the list of configurations (once). if (triton_configs_.empty()) { triton_configs_ = !IsAutotuningEnabled() ? std::vector(1, kDefaultGemmTiling) : debug_options_.xla_gpu_exhaustive_tiling_search() - ? GetExhaustiveTritonConfigs(has_8_bit_operand) + ? GetExhaustiveTritonConfigs() : GetDefaultTritonConfigs(); } @@ -639,6 +636,15 @@ GemmFusionAutotunerImpl::GenerateTritonConfigs(const HloDotInstruction& dot) { } config.split_k = std::min(config.split_k, max_split_k); + // TODO(b/337839570): block_k = 16 is bugged in Triton for dots with 8-bit + // input. Setting minimum to 32 instead of 16 for these cases. + // TODO(b/337838200): Write the restriction on the minimum tile size to be + // generic. Currently we only handle the 8-bit case as this was the bug we + // ran into. + if (has_8_bit_operand && config.block_k == kMinTileSize) { + config.block_k *= 2; + } + // Hopper `wgmma` instruction requires at least 4 warps and 64 elements // for LHS tile height. if (is_hopper) { @@ -649,7 +655,8 @@ GemmFusionAutotunerImpl::GenerateTritonConfigs(const HloDotInstruction& dot) { // Sparse meta should have at least one element per thread. // Note: only 2:4 structured sparsity is currently supported. if (dot.sparse_operands()) { - config.block_k = std::max(config.block_k, 4 * kMinTileSize); + config.block_k = + std::max(config.block_k, kMinTileSize * (has_8_bit_operand ? 4 : 2)); int meta_elements = config.block_m * config.block_k / 16; config.num_warps = std::min(config.num_warps, meta_elements / WarpSize()); @@ -909,19 +916,12 @@ absl::StatusOr> GemmFusionAutotunerImpl::Profile( } std::vector -GemmFusionAutotunerImpl::GetExhaustiveTritonConfigs( - bool has_8_bit_operand) const { +GemmFusionAutotunerImpl::GetExhaustiveTritonConfigs() const { std::vector configs; se::CudaComputeCapability cc = GetComputeCapability(); bool tune_ctas = debug_options_.xla_gpu_enable_triton_hopper() && cc.IsAtLeastHopper(); - // TODO(b/337839570): block_k = 16 is bugged in Triton for dots with 8-bit - // input. Setting minimum to 32 instead of 16 for these cases. - // TODO(b/337838200): Write the restriction on the minimum tile size to be - // generic. Currently we only handle the 8-bit case as this was the bug we - // ran into. - auto kDimBlockSizes = has_8_bit_operand ? kDimKBlockSizes : kBlockSizes; for (int num_stages : kNumStages) { // Volta doesn't support num_stages > 2. if (!cc.IsAtLeastAmpere() && num_stages > 2) { @@ -929,7 +929,7 @@ GemmFusionAutotunerImpl::GetExhaustiveTritonConfigs( } for (int tile_m : kBlockSizes) { for (int tile_n : kBlockSizes) { - for (int tile_k : kDimBlockSizes) { + for (int tile_k : kBlockSizes) { const int tile_lhs = tile_m * tile_k; const int tile_rhs = tile_k * tile_n; for (int num_warps : kNumWarps) { diff --git a/third_party/xla/xla/service/gpu/gemm_fusion_autotuner.h b/third_party/xla/xla/service/gpu/gemm_fusion_autotuner.h index 251826d79e7756..3f2a7a7d19a8e4 100644 --- a/third_party/xla/xla/service/gpu/gemm_fusion_autotuner.h +++ b/third_party/xla/xla/service/gpu/gemm_fusion_autotuner.h @@ -123,8 +123,7 @@ class GemmFusionAutotunerImpl { } std::vector GetDefaultTritonConfigs() const; - std::vector GetExhaustiveTritonConfigs( - bool has_8_bit_operand) const; + std::vector GetExhaustiveTritonConfigs() const; const AutotuneConfig config_; const DebugOptions debug_options_; From f55e2a43033fd921173bf67486ef8645574e6b35 Mon Sep 17 00:00:00 2001 From: Sergey Kozub Date: Tue, 14 May 2024 03:07:12 -0700 Subject: [PATCH 101/478] Fix sparse dot metadata loader Metadata loader was using incorrect warp assignment, which resulted in incorrect addresses with num_warps>4. This was previously missed, as the autotuner rarely selected such configs. PiperOrigin-RevId: 633513110 --- .../sparse_dot_fixes_y24w19.patch | 18 ++++++++++++++++++ .../sparse_dot_fixes_y24w19.patch | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/third_party/triton/xla_extensions/sparse_dot_fixes_y24w19.patch b/third_party/triton/xla_extensions/sparse_dot_fixes_y24w19.patch index 8ac91d153690fd..775ed317d1f9b9 100644 --- a/third_party/triton/xla_extensions/sparse_dot_fixes_y24w19.patch +++ b/third_party/triton/xla_extensions/sparse_dot_fixes_y24w19.patch @@ -11,3 +11,21 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect // get MMA encoding for the given number of warps auto retShapePerCTA = ttg::getShapePerCTA(oldRetType); +diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp +--- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp ++++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp +@@ -31,7 +31,13 @@ Value convertLayout( + // Calculate offset in the tile for the current thread. + Value threadsPerWarp = i32_val(kThreadsPerWarp); + Value warpId = udiv(thread, threadsPerWarp); +- Value warpGroupId = udiv(warpId, i32_val(warpsPerCTA[1])); ++ Value warpGroupId; ++ if (mmaLayout.isHopper()) { ++ warpGroupId = urem(warpId, i32_val(warpsPerCTA[0])); ++ } else { ++ assert(mmaLayout.isAmpere()); ++ warpGroupId = udiv(warpId, i32_val(warpsPerCTA[1])); ++ } + Value laneId = urem(thread, threadsPerWarp); + Value laneGroupId = udiv(laneId, i32_val(kThreadsInGroup)); + Value columnId = urem(laneId, i32_val(shapePerCTATile[1])); diff --git a/third_party/xla/third_party/triton/xla_extensions/sparse_dot_fixes_y24w19.patch b/third_party/xla/third_party/triton/xla_extensions/sparse_dot_fixes_y24w19.patch index 8ac91d153690fd..775ed317d1f9b9 100644 --- a/third_party/xla/third_party/triton/xla_extensions/sparse_dot_fixes_y24w19.patch +++ b/third_party/xla/third_party/triton/xla_extensions/sparse_dot_fixes_y24w19.patch @@ -11,3 +11,21 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect // get MMA encoding for the given number of warps auto retShapePerCTA = ttg::getShapePerCTA(oldRetType); +diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp +--- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp ++++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp +@@ -31,7 +31,13 @@ Value convertLayout( + // Calculate offset in the tile for the current thread. + Value threadsPerWarp = i32_val(kThreadsPerWarp); + Value warpId = udiv(thread, threadsPerWarp); +- Value warpGroupId = udiv(warpId, i32_val(warpsPerCTA[1])); ++ Value warpGroupId; ++ if (mmaLayout.isHopper()) { ++ warpGroupId = urem(warpId, i32_val(warpsPerCTA[0])); ++ } else { ++ assert(mmaLayout.isAmpere()); ++ warpGroupId = udiv(warpId, i32_val(warpsPerCTA[1])); ++ } + Value laneId = urem(thread, threadsPerWarp); + Value laneGroupId = udiv(laneId, i32_val(kThreadsInGroup)); + Value columnId = urem(laneId, i32_val(shapePerCTATile[1])); From 45c2071da17491a46469e894561ea823979b2029 Mon Sep 17 00:00:00 2001 From: Mohammed Anany Date: Tue, 14 May 2024 03:08:23 -0700 Subject: [PATCH 102/478] [Triton] Limit visibility of original bit-width computation during computing mma and dot_operand layouts to not propagate through predicates. Predicates are not supported for these layouts during lowering from TritonGPU to LLVM and cause crashes. PiperOrigin-RevId: 633513652 --- .../triton/temporary/mma_limit_pred.patch | 23 +++++++++++++++++++ third_party/triton/temporary/series.bzl | 1 + .../triton/temporary/mma_limit_pred.patch | 23 +++++++++++++++++++ .../third_party/triton/temporary/series.bzl | 1 + .../xla/service/gpu/ir_emitter_triton_test.cc | 23 +++++++++++++++++++ 5 files changed, 71 insertions(+) create mode 100644 third_party/triton/temporary/mma_limit_pred.patch create mode 100644 third_party/xla/third_party/triton/temporary/mma_limit_pred.patch diff --git a/third_party/triton/temporary/mma_limit_pred.patch b/third_party/triton/temporary/mma_limit_pred.patch new file mode 100644 index 00000000000000..cde501d0a9b6fd --- /dev/null +++ b/third_party/triton/temporary/mma_limit_pred.patch @@ -0,0 +1,23 @@ +// This patch fixes issues that broke internal benchmarks as well as individual +// fusions reported. It should be moved to our internal patches in the next +// integration. + +diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp +--- a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp ++++ b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp +@@ -133,6 +133,15 @@ class BlockedToMMA : public mlir::Rewrit + mutable llvm::DenseMap dotOpInstNs; + + static bool bwdFilter(Operation *op) { ++ // Dot operand layout assignment to Predicates are not currently supported ++ // during lowering from TritonGPU to LLVM in Triton for MMA cases. This ++ // condition limits visibility of the original bit-width so that predicate ++ // are not considered, hence, kwidth can never be = 32. ++ if (isa(op)) { ++ Type srcType = getElementTypeOrSelf(op->getOperand(0)); ++ if (srcType.isInteger(1)) ++ return false; ++ } + return op->getNumOperands() == 1 && + (isa(op) || + isPureUnaryInlineAsm(op) || diff --git a/third_party/triton/temporary/series.bzl b/third_party/triton/temporary/series.bzl index b6002f83e1d7bb..70313c9b436d2a 100644 --- a/third_party/triton/temporary/series.bzl +++ b/third_party/triton/temporary/series.bzl @@ -8,4 +8,5 @@ internal patch during the next triton integration process. temporary_patch_list = [ "//third_party/triton/temporary:pipelining.patch", "//third_party/triton/temporary:support_ceil_op.patch", + "//third_party/triton/temporary:mma_limit_pred.patch", ] diff --git a/third_party/xla/third_party/triton/temporary/mma_limit_pred.patch b/third_party/xla/third_party/triton/temporary/mma_limit_pred.patch new file mode 100644 index 00000000000000..cde501d0a9b6fd --- /dev/null +++ b/third_party/xla/third_party/triton/temporary/mma_limit_pred.patch @@ -0,0 +1,23 @@ +// This patch fixes issues that broke internal benchmarks as well as individual +// fusions reported. It should be moved to our internal patches in the next +// integration. + +diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp +--- a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp ++++ b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp +@@ -133,6 +133,15 @@ class BlockedToMMA : public mlir::Rewrit + mutable llvm::DenseMap dotOpInstNs; + + static bool bwdFilter(Operation *op) { ++ // Dot operand layout assignment to Predicates are not currently supported ++ // during lowering from TritonGPU to LLVM in Triton for MMA cases. This ++ // condition limits visibility of the original bit-width so that predicate ++ // are not considered, hence, kwidth can never be = 32. ++ if (isa(op)) { ++ Type srcType = getElementTypeOrSelf(op->getOperand(0)); ++ if (srcType.isInteger(1)) ++ return false; ++ } + return op->getNumOperands() == 1 && + (isa(op) || + isPureUnaryInlineAsm(op) || diff --git a/third_party/xla/third_party/triton/temporary/series.bzl b/third_party/xla/third_party/triton/temporary/series.bzl index b6002f83e1d7bb..70313c9b436d2a 100644 --- a/third_party/xla/third_party/triton/temporary/series.bzl +++ b/third_party/xla/third_party/triton/temporary/series.bzl @@ -8,4 +8,5 @@ internal patch during the next triton integration process. temporary_patch_list = [ "//third_party/triton/temporary:pipelining.patch", "//third_party/triton/temporary:support_ceil_op.patch", + "//third_party/triton/temporary:mma_limit_pred.patch", ] diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc index 8ebf9e94386885..111f8e276e1de8 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc @@ -1375,6 +1375,29 @@ ENTRY e { EXPECT_EQ(paths.size(), 1); } +TEST_F(TritonGemmTest, DotWithPredFromCompareProducesCorrectResult) { + const std::string hlo_text = R"( +triton_dot { + parameter_0 = s32[4,128]{1,0} parameter(0) + broadcast.255 = s32[4,128,64]{2,1,0} broadcast(parameter_0), dimensions={0,1} + parameter_1 = s32[4,128,64]{2,1,0} parameter(1) + compare.39 = pred[4,128,64]{2,1,0} compare(broadcast.255, parameter_1), direction=EQ + bitcast.1097 = pred[512,64]{1,0} reshape(compare.39) + convert.229 = bf16[512,64]{1,0} convert(bitcast.1097) + parameter_2 = bf16[64,256]{0,1} parameter(2) + ROOT dot.21 = bf16[512,256]{1,0} dot(convert.229, parameter_2), + lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +ENTRY main { + p0 = s32[4,128]{1,0} parameter(0) + p1 = s32[4,128,64]{2,1,0} parameter(1) + p2 = bf16[64,256]{0,1} parameter(2) + ROOT gemm_fusion_dot.0 = bf16[512,256]{1,0} fusion(p0, p1, p2), kind=kCustom, calls=triton_dot, backend_config={"fusion_backend_config":{"kind":"__triton_gemm","triton_gemm_config":{"block_m":"64","block_n":"128","block_k":"32","split_k":"1","num_stages":"4","num_warps":"4","num_ctas":"1"}}} +})"; + + EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec{/*aabs=*/1e-3, /*arel=*/1e-3})); +} + TEST_F(TritonGemmTest, UseTensorCoresForF32OnAmpere) { const std::string kHloText = R"( triton_gemm_r { From 3734929103b83e6b5a93a77a58d361b387d1feaf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 03:17:43 -0700 Subject: [PATCH 103/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633517388 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 5019fe2a5b2fad..0556051577f5e4 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugproto +go/debugstr op { name: "Abort" attr { From d6bfe26897528425030ff0cd9b394159d0100504 Mon Sep 17 00:00:00 2001 From: Leo Heinsaar Date: Tue, 14 May 2024 03:30:39 -0700 Subject: [PATCH 104/478] Move `BuildAttributesMap()` to a common place So that both CPU and GPU code can reuse the same implementation. At the moment each uses its own version that are identical. PiperOrigin-RevId: 633522046 --- third_party/xla/xla/ffi/BUILD | 17 +++ third_party/xla/xla/ffi/attribute_map.cc | 130 ++++++++++++++++++ third_party/xla/xla/ffi/attribute_map.h | 32 +++++ third_party/xla/xla/service/cpu/BUILD | 1 + .../service/cpu/runtime_handle_ffi_call.cc | 92 +------------ third_party/xla/xla/service/gpu/BUILD | 1 + third_party/xla/xla/service/gpu/fusions/BUILD | 1 + .../xla/xla/service/gpu/fusions/custom.cc | 3 +- .../xla/service/gpu/ir_emitter_unnested.cc | 3 +- .../service/gpu/runtime/custom_call_thunk.cc | 94 ------------- .../service/gpu/runtime/custom_call_thunk.h | 5 - third_party/xla/xla/tests/BUILD | 1 + 12 files changed, 189 insertions(+), 191 deletions(-) create mode 100644 third_party/xla/xla/ffi/attribute_map.cc create mode 100644 third_party/xla/xla/ffi/attribute_map.h diff --git a/third_party/xla/xla/ffi/BUILD b/third_party/xla/xla/ffi/BUILD index c53835bf83a88d..4da1564a334ad1 100644 --- a/third_party/xla/xla/ffi/BUILD +++ b/third_party/xla/xla/ffi/BUILD @@ -102,6 +102,23 @@ cc_library( ], ) +cc_library( + name = "attribute_map", + srcs = ["attribute_map.cc"], + hdrs = ["attribute_map.h"], + deps = [ + ":call_frame", + "//xla:xla_data_proto_cc_impl", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Support", + "@local_tsl//tsl/platform:errors", + ], +) + xla_cc_test( name = "ffi_test", srcs = ["ffi_test.cc"], diff --git a/third_party/xla/xla/ffi/attribute_map.cc b/third_party/xla/xla/ffi/attribute_map.cc new file mode 100644 index 00000000000000..9443a8c010d542 --- /dev/null +++ b/third_party/xla/xla/ffi/attribute_map.cc @@ -0,0 +1,130 @@ +/* Copyright 2024 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "xla/ffi/attribute_map.h" + +#include +#include + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "llvm/ADT/TypeSwitch.h" +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/BuiltinAttributes.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "xla/ffi/call_frame.h" +#include "xla/xla_data.pb.h" +#include "tsl/platform/errors.h" + +using FlatAttribute = xla::ffi::CallFrameBuilder::FlatAttribute; +using FlatAttributesMap = xla::ffi::CallFrameBuilder::FlatAttributesMap; + +namespace xla::ffi { + +absl::StatusOr BuildAttributesMap( + mlir::DictionaryAttr dict) { + FlatAttributesMap attributes; + for (auto& kv : dict) { + std::string_view name = kv.getName().strref(); + + auto boolean = [&](mlir::BoolAttr boolean) { + attributes[name] = static_cast(boolean.getValue()); + return absl::OkStatus(); + }; + + auto integer = [&](mlir::IntegerAttr integer) { + switch (integer.getType().getIntOrFloatBitWidth()) { + case 1: + attributes[name] = static_cast(integer.getInt()); + return absl::OkStatus(); + case 8: + attributes[name] = static_cast(integer.getInt()); + return absl::OkStatus(); + case 16: + attributes[name] = static_cast(integer.getInt()); + return absl::OkStatus(); + case 32: + attributes[name] = static_cast(integer.getInt()); + return absl::OkStatus(); + case 64: + attributes[name] = static_cast(integer.getInt()); + return absl::OkStatus(); + default: + return absl::InvalidArgumentError(absl::StrCat( + "Unsupported integer attribute bit width for attribute: ", name)); + } + }; + + auto fp = [&](mlir::FloatAttr fp) { + switch (fp.getType().getIntOrFloatBitWidth()) { + case 32: + attributes[name] = static_cast(fp.getValue().convertToFloat()); + return absl::OkStatus(); + case 64: + attributes[name] = + static_cast(fp.getValue().convertToDouble()); + return absl::OkStatus(); + default: + return absl::InvalidArgumentError(absl::StrCat( + "Unsupported float attribute bit width for attribute: ", name)); + } + }; + + auto arr = [&](mlir::DenseArrayAttr arr) { + if (auto dense = mlir::dyn_cast(arr)) { + attributes[name] = dense.asArrayRef().vec(); + return absl::OkStatus(); + } else if (auto dense = mlir::dyn_cast(arr)) { + attributes[name] = dense.asArrayRef().vec(); + return absl::OkStatus(); + } else if (auto dense = mlir::dyn_cast(arr)) { + attributes[name] = dense.asArrayRef().vec(); + return absl::OkStatus(); + } else if (auto dense = mlir::dyn_cast(arr)) { + attributes[name] = dense.asArrayRef().vec(); + return absl::OkStatus(); + } else if (auto dense = mlir::dyn_cast(arr)) { + attributes[name] = dense.asArrayRef().vec(); + return absl::OkStatus(); + } else if (auto dense = mlir::dyn_cast(arr)) { + attributes[name] = dense.asArrayRef().vec(); + return absl::OkStatus(); + } else { + return absl::InvalidArgumentError(absl::StrCat( + "Unsupported array element type for attribute: ", name)); + } + }; + + auto str = [&](mlir::StringAttr str) { + attributes[name] = str.getValue().str(); + return absl::OkStatus(); + }; + + TF_RETURN_IF_ERROR( + llvm::TypeSwitch(kv.getValue()) + .Case(boolean) + .Case(integer) + .Case(fp) + .Case(arr) + .Case(str) + .Default([&](mlir::Attribute) { + return absl::InvalidArgumentError(absl::StrCat( + "Unsupported attribute type for attribute: ", name)); + })); + } + return attributes; +} +} // namespace xla::ffi diff --git a/third_party/xla/xla/ffi/attribute_map.h b/third_party/xla/xla/ffi/attribute_map.h new file mode 100644 index 00000000000000..d6c37b31c5522b --- /dev/null +++ b/third_party/xla/xla/ffi/attribute_map.h @@ -0,0 +1,32 @@ +/* Copyright 2024 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_FFI_ATTRIBUTE_MAP_H_ +#define XLA_FFI_ATTRIBUTE_MAP_H_ + +#include "absl/status/statusor.h" +#include "mlir/IR/BuiltinAttributes.h" // from @llvm-project +#include "xla/ffi/call_frame.h" + +namespace xla::ffi { + +// Converts MLIR dictionary attribute attached to a custom call operation to a +// custom call handler attributes that are forwarded to the FFI handler. +absl::StatusOr BuildAttributesMap( + mlir::DictionaryAttr dict); + +} // namespace xla::ffi + +#endif // XLA_FFI_ATTRIBUTE_MAP_H_ diff --git a/third_party/xla/xla/service/cpu/BUILD b/third_party/xla/xla/service/cpu/BUILD index 125af6dbce9170..1d0defbdcced10 100644 --- a/third_party/xla/xla/service/cpu/BUILD +++ b/third_party/xla/xla/service/cpu/BUILD @@ -1184,6 +1184,7 @@ cc_library( deps = [ "//xla:shape_util", "//xla:xla_data_proto_cc", + "//xla/ffi:attribute_map", "//xla/ffi:call_frame", "//xla/ffi:ffi_api", "//xla/service:custom_call_status_public_headers", diff --git a/third_party/xla/xla/service/cpu/runtime_handle_ffi_call.cc b/third_party/xla/xla/service/cpu/runtime_handle_ffi_call.cc index 7a7963593f592a..7d3271db600634 100644 --- a/third_party/xla/xla/service/cpu/runtime_handle_ffi_call.cc +++ b/third_party/xla/xla/service/cpu/runtime_handle_ffi_call.cc @@ -33,6 +33,7 @@ limitations under the License. #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/BuiltinAttributes.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "xla/ffi/attribute_map.h" #include "xla/ffi/call_frame.h" #include "xla/ffi/ffi_api.h" #include "xla/primitive_util.h" @@ -46,95 +47,6 @@ namespace ffi = xla::ffi; namespace { -using Attribute = ffi::CallFrameBuilder::FlatAttribute; -using AttributesMap = ffi::CallFrameBuilder::FlatAttributesMap; - -// TODO(heinsaar): This BuildAttributesMap() is originally an identical -// copy-paste of the same function in custom_call_thunk.cc -// May make sense to have one in a common place & reuse. -absl::StatusOr BuildAttributesMap(mlir::DictionaryAttr dict) { - AttributesMap attributes; - for (auto& kv : dict) { - std::string_view name = kv.getName().strref(); - - auto boolean = [&](mlir::BoolAttr boolean) { - attributes[name] = static_cast(boolean.getValue()); - return absl::OkStatus(); - }; - - auto integer = [&](mlir::IntegerAttr integer) { - const bool is_unsigned = integer.getType().isUnsignedInteger(); - if (is_unsigned) { - switch (integer.getType().getIntOrFloatBitWidth()) { - case 8: - attributes[name] = static_cast(integer.getUInt()); - return absl::OkStatus(); - case 16: - attributes[name] = static_cast(integer.getUInt()); - return absl::OkStatus(); - case 32: - attributes[name] = static_cast(integer.getUInt()); - return absl::OkStatus(); - case 64: - attributes[name] = static_cast(integer.getUInt()); - return absl::OkStatus(); - default: - return absl::InvalidArgumentError(absl::StrCat( - "Unsupported integer attribute bit width for attribute: ", - name)); - } - } else { - switch (integer.getType().getIntOrFloatBitWidth()) { - case 8: - attributes[name] = static_cast(integer.getInt()); - return absl::OkStatus(); - case 16: - attributes[name] = static_cast(integer.getInt()); - return absl::OkStatus(); - case 32: - attributes[name] = static_cast(integer.getInt()); - return absl::OkStatus(); - case 64: - attributes[name] = static_cast(integer.getInt()); - return absl::OkStatus(); - default: - return absl::InvalidArgumentError(absl::StrCat( - "Unsupported integer attribute bit width for attribute: ", - name)); - } - } - }; - - auto fp = [&](mlir::FloatAttr fp) { - switch (fp.getType().getIntOrFloatBitWidth()) { - case 32: - attributes[name] = static_cast(fp.getValue().convertToFloat()); - return absl::OkStatus(); - default: - return absl::InvalidArgumentError(absl::StrCat( - "Unsupported float attribute bit width for attribute: ", name)); - } - }; - - auto str = [&](mlir::StringAttr str) { - attributes[name] = str.getValue().str(); - return absl::OkStatus(); - }; - - TF_RETURN_IF_ERROR( - llvm::TypeSwitch(kv.getValue()) - .Case(boolean) - .Case(integer) - .Case(fp) - .Case(str) - .Default([&](mlir::Attribute) { - return absl::InvalidArgumentError(absl::StrCat( - "Unsupported attribute type for attribute: ", name)); - })); - } - return attributes; -} - absl::Span DecodeDims(int64_t* encoded_dims_data) { // Annotate memory coming from jit compiled function as initialized to // suppress false positives from msan sanitizer. @@ -230,7 +142,7 @@ inline absl::Status BuildAndCallFfi( // and build an MLIR compatible map of attributes out of it. mlir::Attribute attr = mlir::parseAttribute(backend_config, &mlir_context); if (auto dict = attr.dyn_cast_or_null()) { - TF_ASSIGN_OR_RETURN(attributes, BuildAttributesMap(dict)); + TF_ASSIGN_OR_RETURN(attributes, xla::ffi::BuildAttributesMap(dict)); } else { return absl::InternalError( "Unsupported backend config. Expected a string parsable into " diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 5faf098331bb14..e665640e8ce5eb 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -347,6 +347,7 @@ cc_library( "//xla:statusor", "//xla:util", "//xla:xla_data_proto_cc", + "//xla/ffi:attribute_map", "//xla/ffi:ffi_api", "//xla/ffi/api:c_api", "//xla/hlo/ir:hlo", diff --git a/third_party/xla/xla/service/gpu/fusions/BUILD b/third_party/xla/xla/service/gpu/fusions/BUILD index 02752d4bfffe04..2e0a1202157486 100644 --- a/third_party/xla/xla/service/gpu/fusions/BUILD +++ b/third_party/xla/xla/service/gpu/fusions/BUILD @@ -126,6 +126,7 @@ cc_library( "//xla:status", "//xla:statusor", "//xla:util", + "//xla/ffi:attribute_map", "//xla/ffi:ffi_api", "//xla/hlo/ir:hlo", "//xla/service:buffer_assignment", diff --git a/third_party/xla/xla/service/gpu/fusions/custom.cc b/third_party/xla/xla/service/gpu/fusions/custom.cc index 5d5cc5d3898e08..e6dc11a46cb0be 100644 --- a/third_party/xla/xla/service/gpu/fusions/custom.cc +++ b/third_party/xla/xla/service/gpu/fusions/custom.cc @@ -34,6 +34,7 @@ limitations under the License. #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/BuiltinAttributes.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project +#include "xla/ffi/attribute_map.h" #include "xla/ffi/ffi_api.h" #include "xla/hlo/ir/hlo_casting_utils.h" #include "xla/hlo/ir/hlo_instruction.h" @@ -563,7 +564,7 @@ absl::StatusOr EmitCustomCall( mlir::Attribute attr = mlir::parseAttribute( backend_config_str, ir_emitter_context.mlir_context()); if (auto dict = mlir::dyn_cast_or_null(attr)) { - TF_ASSIGN_OR_RETURN(attributes, BuildAttributesMap(dict)); + TF_ASSIGN_OR_RETURN(attributes, xla::ffi::BuildAttributesMap(dict)); break; } return absl::InternalError( diff --git a/third_party/xla/xla/service/gpu/ir_emitter_unnested.cc b/third_party/xla/xla/service/gpu/ir_emitter_unnested.cc index 308a7c479822f5..85e39381bac1d9 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_unnested.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_unnested.cc @@ -72,6 +72,7 @@ limitations under the License. #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" // from @llvm-project #include "mlir/Target/LLVMIR/Export.h" // from @llvm-project #include "xla/ffi/api/c_api.h" +#include "xla/ffi/attribute_map.h" #include "xla/ffi/ffi_api.h" #include "xla/hlo/ir/hlo_casting_utils.h" #include "xla/hlo/ir/hlo_computation.h" @@ -1405,7 +1406,7 @@ absl::Status IrEmitterUnnested::EmitCustomCallThunk( mlir::Attribute attr = mlir::parseAttribute( backend_config_str, ir_emitter_context_->mlir_context()); if (auto dict = mlir::dyn_cast_or_null(attr)) { - TF_ASSIGN_OR_RETURN(attributes, BuildAttributesMap(dict)); + TF_ASSIGN_OR_RETURN(attributes, xla::ffi::BuildAttributesMap(dict)); break; } return absl::InternalError( diff --git a/third_party/xla/xla/service/gpu/runtime/custom_call_thunk.cc b/third_party/xla/xla/service/gpu/runtime/custom_call_thunk.cc index 9c25e984252519..b110384ce35563 100644 --- a/third_party/xla/xla/service/gpu/runtime/custom_call_thunk.cc +++ b/third_party/xla/xla/service/gpu/runtime/custom_call_thunk.cc @@ -167,99 +167,5 @@ absl::Status CustomCallThunk::ExecuteOnStream(const ExecuteParams& params) { return handler_ ? ExecuteFfiHandler(params) : ExecuteCustomCall(params); } -absl::StatusOr BuildAttributesMap( - mlir::DictionaryAttr dict) { - CustomCallThunk::AttributesMap attributes; - for (auto& kv : dict) { - std::string_view name = kv.getName().strref(); - - auto boolean = [&](mlir::BoolAttr boolean) { - attributes[name] = static_cast(boolean.getValue()); - return absl::OkStatus(); - }; - - auto integer = [&](mlir::IntegerAttr integer) { - switch (integer.getType().getIntOrFloatBitWidth()) { - case 1: - attributes[name] = static_cast(integer.getInt()); - return absl::OkStatus(); - case 8: - attributes[name] = static_cast(integer.getInt()); - return absl::OkStatus(); - case 16: - attributes[name] = static_cast(integer.getInt()); - return absl::OkStatus(); - case 32: - attributes[name] = static_cast(integer.getInt()); - return absl::OkStatus(); - case 64: - attributes[name] = static_cast(integer.getInt()); - return absl::OkStatus(); - default: - return absl::InvalidArgumentError(absl::StrCat( - "Unsupported integer attribute bit width for attribute: ", name)); - } - }; - - auto fp = [&](mlir::FloatAttr fp) { - switch (fp.getType().getIntOrFloatBitWidth()) { - case 32: - attributes[name] = static_cast(fp.getValue().convertToFloat()); - return absl::OkStatus(); - case 64: - attributes[name] = - static_cast(fp.getValue().convertToDouble()); - return absl::OkStatus(); - default: - return absl::InvalidArgumentError(absl::StrCat( - "Unsupported float attribute bit width for attribute: ", name)); - } - }; - - auto arr = [&](mlir::DenseArrayAttr arr) { - if (auto dense = mlir::dyn_cast(arr)) { - attributes[name] = dense.asArrayRef().vec(); - return absl::OkStatus(); - } else if (auto dense = mlir::dyn_cast(arr)) { - attributes[name] = dense.asArrayRef().vec(); - return absl::OkStatus(); - } else if (auto dense = mlir::dyn_cast(arr)) { - attributes[name] = dense.asArrayRef().vec(); - return absl::OkStatus(); - } else if (auto dense = mlir::dyn_cast(arr)) { - attributes[name] = dense.asArrayRef().vec(); - return absl::OkStatus(); - } else if (auto dense = mlir::dyn_cast(arr)) { - attributes[name] = dense.asArrayRef().vec(); - return absl::OkStatus(); - } else if (auto dense = mlir::dyn_cast(arr)) { - attributes[name] = dense.asArrayRef().vec(); - return absl::OkStatus(); - } else { - return absl::InvalidArgumentError(absl::StrCat( - "Unsupported array element type for attribute: ", name)); - } - }; - - auto str = [&](mlir::StringAttr str) { - attributes[name] = str.getValue().str(); - return absl::OkStatus(); - }; - - TF_RETURN_IF_ERROR( - llvm::TypeSwitch(kv.getValue()) - .Case(boolean) - .Case(integer) - .Case(fp) - .Case(arr) - .Case(str) - .Default([&](mlir::Attribute) { - return absl::InvalidArgumentError(absl::StrCat( - "Unsupported attribute type for attribute: ", name)); - })); - } - return attributes; -} - } // namespace gpu } // namespace xla diff --git a/third_party/xla/xla/service/gpu/runtime/custom_call_thunk.h b/third_party/xla/xla/service/gpu/runtime/custom_call_thunk.h index 02679d2e0d21ff..2d797ecea01a6c 100644 --- a/third_party/xla/xla/service/gpu/runtime/custom_call_thunk.h +++ b/third_party/xla/xla/service/gpu/runtime/custom_call_thunk.h @@ -120,11 +120,6 @@ class CustomCallThunk : public Thunk { const HloComputation* called_computation_ = nullptr; }; -// Converts MLIR dictionary attribute attached to a custom call operation to a -// custom call thunk attributes that are forwarded to the FFI handler. -absl::StatusOr BuildAttributesMap( - mlir::DictionaryAttr dict); - } // namespace gpu } // namespace xla diff --git a/third_party/xla/xla/tests/BUILD b/third_party/xla/xla/tests/BUILD index 2bffc0a1316e5f..58600c91d25852 100644 --- a/third_party/xla/xla/tests/BUILD +++ b/third_party/xla/xla/tests/BUILD @@ -1783,6 +1783,7 @@ xla_test( "//xla/client:xla_builder", "//xla/client/lib:constants", "//xla/ffi", + "//xla/ffi:attribute_map", "//xla/ffi:ffi_api", "//xla/hlo/ir:hlo", "//xla/service:custom_call_status", From b8629e1a9a0df24df2b9ce7f51a594ffd902519a Mon Sep 17 00:00:00 2001 From: Johannes Reifferscheid Date: Tue, 14 May 2024 04:04:35 -0700 Subject: [PATCH 105/478] Canonicalize affine expression trees in simplifier. I don't know where the current non-determinism comes from, but this should fix it. The particular order is not important, but it being canonical is. These expressions are used for codegen in the new emitters, so the simplification must be deterministic. PiperOrigin-RevId: 633531399 --- .../gpu/fusions/concatenate_mlir_test.cc | 2 +- .../service/gpu/fusions/concatenate_test.cc | 2 +- .../service/gpu/fusions/input_slices_test.cc | 4 +- .../xla/service/gpu/fusions/loop_mlir_test.cc | 4 +- .../xla/xla/service/gpu/fusions/loop_test.cc | 4 +- .../gpu/fusions/reduction_base_test.cc | 2 +- .../service/gpu/fusions/scatter_mlir_test.cc | 2 +- .../xla/service/gpu/fusions/scatter_test.cc | 2 +- .../gpu/fusions/transpose_mlir_test.cc | 12 +-- .../xla/service/gpu/fusions/transpose_test.cc | 4 +- .../gpu/model/indexing_analysis_test.cc | 8 +- .../xla/xla/service/gpu/model/indexing_map.cc | 70 ++++++++++++++++- .../service/gpu/model/indexing_map_test.cc | 6 +- .../service/gpu/model/indexing_test_utils.cc | 77 +++---------------- 14 files changed, 107 insertions(+), 92 deletions(-) diff --git a/third_party/xla/xla/service/gpu/fusions/concatenate_mlir_test.cc b/third_party/xla/xla/service/gpu/fusions/concatenate_mlir_test.cc index eea090ed80474a..781ebf563f66ea 100644 --- a/third_party/xla/xla/service/gpu/fusions/concatenate_mlir_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/concatenate_mlir_test.cc @@ -57,7 +57,7 @@ TEST_F(MlirConcatenateFusionTest, ThreadIdIndexing) { constexpr auto kIndexing = R"( (th_x, th_y, th_z, bl_x, bl_y, bl_z)[chunk_id, unroll_id] -> ( - (th_x + bl_x * 128) mod 400) + (bl_x * 128 + th_x) mod 400) domain: th_x in [0, 127] th_y in [0, 0] diff --git a/third_party/xla/xla/service/gpu/fusions/concatenate_test.cc b/third_party/xla/xla/service/gpu/fusions/concatenate_test.cc index b617fd6513d107..4ac8ca3c665497 100644 --- a/third_party/xla/xla/service/gpu/fusions/concatenate_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/concatenate_test.cc @@ -80,7 +80,7 @@ TEST_F(ConcatenateTest, ThreadIndexing) { constexpr auto kIndexing = R"( (th_x, th_y, th_z, bl_x, bl_y, bl_z)[chunk_id, unroll_id] -> ( - (th_x + bl_x * 128) mod 400) + (bl_x * 128 + th_x) mod 400) domain: th_x in [0, 127] th_y in [0, 0] diff --git a/third_party/xla/xla/service/gpu/fusions/input_slices_test.cc b/third_party/xla/xla/service/gpu/fusions/input_slices_test.cc index 094bbfac7a27a9..9c10beb8be1e12 100644 --- a/third_party/xla/xla/service/gpu/fusions/input_slices_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/input_slices_test.cc @@ -80,8 +80,8 @@ TEST_F(InputSlicesTest, ThreadIndexing) { EXPECT_THAT(thread_id_to_output_indexing->ToString(printer_), MatchIndexingString(R"( (th_x, th_y, th_z, bl_x, bl_y, bl_z)[chunk_id, unroll_id] -> (0, - ((th_x + bl_x * 128) floordiv 3) mod 2, - (th_x + bl_x * 128) mod 3, + ((bl_x * 128 + th_x) floordiv 3) mod 2, + (bl_x * 128 + th_x) mod 3, ((bl_x * 64 + th_x floordiv 2) floordiv 3) mod 5) domain: th_x in [0, 127] diff --git a/third_party/xla/xla/service/gpu/fusions/loop_mlir_test.cc b/third_party/xla/xla/service/gpu/fusions/loop_mlir_test.cc index 4a283349779fdc..bd319d9d86d60d 100644 --- a/third_party/xla/xla/service/gpu/fusions/loop_mlir_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/loop_mlir_test.cc @@ -55,7 +55,7 @@ TEST_F(MlirLoopFusionTest, ThreadId_IndexingUnrolled) { MatchIndexingString(R"( (th_x, th_y, th_z, bl_x, bl_y, bl_z)[chunk_id, unroll_id] -> ( (((bl_x * 16 + th_x floordiv 8) floordiv 3 + chunk_id * 5376) floordiv 625) mod 100, - (((th_x + bl_x * 128) floordiv 3 + chunk_id * 43008) floordiv 25) mod 200, + (((bl_x * 128 + th_x) floordiv 3 + chunk_id * 43008) floordiv 25) mod 200, (th_x * 4 + bl_x * 512 + chunk_id * 516096) mod 300 + unroll_id ) domain: @@ -150,7 +150,7 @@ TEST_F(MlirLoopFusionTest, ThreadId_Broadcast) { (th_x, th_y, th_z, bl_x, bl_y, bl_z)[chunk_id, unroll_id] -> ( ((bl_x * 16 + th_x floordiv 8) floordiv 75) mod 10, ((bl_x * 64 + th_x floordiv 2) floordiv 15) mod 20, - (th_x + bl_x * 128) mod 30) + (bl_x * 128 + th_x) mod 30) domain: th_x in [0, 127] th_y in [0, 0] diff --git a/third_party/xla/xla/service/gpu/fusions/loop_test.cc b/third_party/xla/xla/service/gpu/fusions/loop_test.cc index d87bc8fa0db98e..4507879c2310ea 100644 --- a/third_party/xla/xla/service/gpu/fusions/loop_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/loop_test.cc @@ -90,7 +90,7 @@ TEST_F(LoopTest, ThreadIndexingUnrolled) { MatchIndexingString(R"( (th_x, th_y, th_z, bl_x, bl_y, bl_z)[chunk_id, unroll_id] -> ( (((bl_x * 16 + th_x floordiv 8) floordiv 3 + chunk_id * 5376) floordiv 625) mod 100, - (((th_x + bl_x * 128) floordiv 3 + chunk_id * 43008) floordiv 25) mod 200, + (((bl_x * 128 + th_x) floordiv 3 + chunk_id * 43008) floordiv 25) mod 200, (th_x * 4 + bl_x * 512 + chunk_id * 516096) mod 300 + unroll_id ) domain: @@ -186,7 +186,7 @@ TEST_F(LoopTest, Broadcast) { (th_x, th_y, th_z, bl_x, bl_y, bl_z)[chunk_id, unroll_id] -> ( ((bl_x * 16 + th_x floordiv 8) floordiv 75) mod 10, ((bl_x * 64 + th_x floordiv 2) floordiv 15) mod 20, - (th_x + bl_x * 128) mod 30) + (bl_x * 128 + th_x) mod 30) domain: th_x in [0, 127] th_y in [0, 0] diff --git a/third_party/xla/xla/service/gpu/fusions/reduction_base_test.cc b/third_party/xla/xla/service/gpu/fusions/reduction_base_test.cc index aefb4ad8219313..6a43851f555bc7 100644 --- a/third_party/xla/xla/service/gpu/fusions/reduction_base_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/reduction_base_test.cc @@ -330,7 +330,7 @@ TEST_F(ReductionTest, ThreadIndexingSideOutput) { constexpr char kExpectedIndexing[] = R"( (d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3] -> ( d3 floordiv 8, - d0 floordiv 32 + (d3 mod 8) * 8, + (d3 mod 8) * 8 + d0 floordiv 32, (d0 mod 32) * 2 + s2 * 64 + s3 ) domain: diff --git a/third_party/xla/xla/service/gpu/fusions/scatter_mlir_test.cc b/third_party/xla/xla/service/gpu/fusions/scatter_mlir_test.cc index 713314969c8d82..86ce79992c819c 100644 --- a/third_party/xla/xla/service/gpu/fusions/scatter_mlir_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/scatter_mlir_test.cc @@ -82,7 +82,7 @@ TEST_F(MlirScatterFusionTest, ThreadIdIndexing) { (th_x, th_y, th_z, bl_x, bl_y, bl_z)[chunk_id, unroll_id] -> ( ((bl_x * 16 + th_x floordiv 8) floordiv 25) mod 42, ((bl_x * 32 + th_x floordiv 4) floordiv 5) mod 10, - (th_x + bl_x * 128) mod 20) + (bl_x * 128 + th_x) mod 20) domain: th_x in [0, 127] th_y in [0, 0] diff --git a/third_party/xla/xla/service/gpu/fusions/scatter_test.cc b/third_party/xla/xla/service/gpu/fusions/scatter_test.cc index 2be8dc86d75540..07e2da47a6a6b0 100644 --- a/third_party/xla/xla/service/gpu/fusions/scatter_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/scatter_test.cc @@ -148,7 +148,7 @@ TEST_F(ScatterFusionTest, ThreadIdIndexing) { (th_x, th_y, th_z, bl_x, bl_y, bl_z)[chunk_id, unroll_id] -> ( ((bl_x * 16 + th_x floordiv 8) floordiv 25) mod 42, ((bl_x * 32 + th_x floordiv 4) floordiv 5) mod 10, - (th_x + bl_x * 128) mod 20) + (bl_x * 128 + th_x) mod 20) domain: th_x in [0, 127] th_y in [0, 0] diff --git a/third_party/xla/xla/service/gpu/fusions/transpose_mlir_test.cc b/third_party/xla/xla/service/gpu/fusions/transpose_mlir_test.cc index 64c66419c48a1b..086ebf8f2ad911 100644 --- a/third_party/xla/xla/service/gpu/fusions/transpose_mlir_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/transpose_mlir_test.cc @@ -51,8 +51,8 @@ TEST_F(MlirTransposeFusionTest, ThreadIndexing021) { MatchIndexingString(R"( (d0, d1, d2, d3, d4, d5)[s0, s1, s2] -> ( d3 floordiv 2, - ### d0 floordiv 32 + s1 * 4 ###, - ### (d3 mod 2) * 32 + d0 mod 32 ### + d0 floordiv 32 + s1 * 4, + (d3 mod 2) * 32 + d0 mod 32 ) domain: d0 in [0, 127] @@ -71,7 +71,7 @@ TEST_F(MlirTransposeFusionTest, ThreadIndexing021) { MatchIndexingString(R"( (d0, d1, d2, d3, d4, d5)[s0, s1, s2] -> ( d3 floordiv 2, - ### d0 floordiv 32 + (d3 mod 2) * 32 + s1 * 4 ###, + (d3 mod 2) * 32 + s1 * 4 + d0 floordiv 32, d0 mod 32 ) domain: @@ -110,7 +110,7 @@ TEST_F(MlirTransposeFusionTest, ThreadIndexing201) { MatchIndexingString(R"( (d0, d1, d2, d3, d4, d5)[s0, s1, s2] -> ( d3 floordiv 2, - d0 floordiv 32 + (d3 * 32 + s1 * 4) mod 64, + (d3 * 32 + s1 * 4) mod 64 + d0 floordiv 32, d0 mod 32 ) domain: @@ -129,9 +129,9 @@ TEST_F(MlirTransposeFusionTest, ThreadIndexing201) { fusion.ComputeThreadIdToOutputIndexing(0, &mlir_context_)->ToString(), MatchIndexingString(R"( (d0, d1, d2, d3, d4, d5)[s0, s1, s2] -> ( - ### d0 floordiv 32 + s1 * 4 ###, + d0 floordiv 32 + s1 * 4, d3 floordiv 2, - ### (d3 mod 2) * 32 + d0 mod 32 ### + (d3 mod 2) * 32 + d0 mod 32 ) domain: d0 in [0, 127] diff --git a/third_party/xla/xla/service/gpu/fusions/transpose_test.cc b/third_party/xla/xla/service/gpu/fusions/transpose_test.cc index dbafd613d15b2b..a9b40704ff7842 100644 --- a/third_party/xla/xla/service/gpu/fusions/transpose_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/transpose_test.cc @@ -97,7 +97,7 @@ TEST_F(TransposeTest, ThreadIndexing021) { MatchIndexingString(R"( (d0, d1, d2, d3, d4, d5)[s0, s1, s2] -> ( d3 floordiv 2, - d0 floordiv 32 + (d3 mod 2) * 32 + s1 * 4, + (d3 mod 2) * 32 + s1 * 4 + d0 floordiv 32, d0 mod 32 ) domain: @@ -139,7 +139,7 @@ TEST_F(TransposeTest, ThreadIndexing201) { MatchIndexingString(R"( (d0, d1, d2, d3, d4, d5)[s0, s1, s2] -> ( d3 floordiv 2, - d0 floordiv 32 + (d3 * 32 + s1 * 4) mod 64, + (d3 * 32 + s1 * 4) mod 64 + d0 floordiv 32, d0 mod 32 ) domain: diff --git a/third_party/xla/xla/service/gpu/model/indexing_analysis_test.cc b/third_party/xla/xla/service/gpu/model/indexing_analysis_test.cc index 0413c8280786cc..daddff1f4c1436 100644 --- a/third_party/xla/xla/service/gpu/model/indexing_analysis_test.cc +++ b/third_party/xla/xla/service/gpu/model/indexing_analysis_test.cc @@ -1643,7 +1643,7 @@ TEST_F(IndexingAnalysisTest, ReshapeOpGenericReshape2DTo3D) { EXPECT_THAT(input_indexing.indexing_maps, ElementsAre(ElementsAre(MatchIndexingMap(R"( (d0, d1, d2) -> (d0 * 2 + d1 floordiv 2, - d2 + (d1 mod 2) * 4) + (d1 mod 2) * 4 + d2) domain: d0 in [0, 1] d1 in [0, 3] @@ -1662,7 +1662,7 @@ TEST_F(IndexingAnalysisTest, ReshapeOpGenericReshape3DTo2D) { EXPECT_THAT(input_indexing.indexing_maps, ElementsAre(ElementsAre(MatchIndexingMap(R"( (d0, d1) -> (d0 floordiv 2, - d1 floordiv 4 + (d0 mod 2) * 2, + (d0 mod 2) * 2 + d1 floordiv 4, d1 mod 4) domain: d0 in [0, 3] @@ -2615,7 +2615,7 @@ TEST_F(IndexingAnalysisTest, TilingIndexing) { EXPECT_THAT(indexing_map.ToString(), MatchIndexingString(R"( (d0, d1, d2, d3, d4, d5)[s0, s1, s2] -> ( (d3 floordiv 64) * 8 + s0, - d0 floordiv 4 + (d3 mod 64) * 4, + (d3 mod 64) * 4 + d0 floordiv 4, d0 mod 4 + s2 * 4 ) domain: @@ -2659,7 +2659,7 @@ TEST_F(IndexingAnalysisTest, EpilogueIndexing) { ComputeEpilogueInputToOutputIndexing(transpose, log, &mlir_context_) .ToString(), MatchIndexingString(R"( - (d0, d1) -> (d0 + d1 * 1000) + (d0, d1) -> (d1 * 1000 + d0) domain: d0 in [0, 999] d1 in [0, 999] diff --git a/third_party/xla/xla/service/gpu/model/indexing_map.cc b/third_party/xla/xla/service/gpu/model/indexing_map.cc index f12f923342276c..80390bb6c119e8 100644 --- a/third_party/xla/xla/service/gpu/model/indexing_map.cc +++ b/third_party/xla/xla/service/gpu/model/indexing_map.cc @@ -32,6 +32,7 @@ limitations under the License. #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" #include "mlir/IR/AffineExpr.h" // from @llvm-project #include "mlir/IR/AffineMap.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project @@ -359,6 +360,69 @@ AffineExpr AffineExprSimplifier::RewriteSumIf( return pred(expr) ? expr : mlir::getAffineConstantExpr(0, expr.getContext()); } +// Compares the two expression by their AST. The ordering is arbitrary but +// similar to what MLIR's simplifier does. +int CompareExprs(AffineExpr a, AffineExpr b) { + if ((b.getKind() == AffineExprKind::Constant) != + (a.getKind() == AffineExprKind::Constant)) { + return a.getKind() == AffineExprKind::Constant ? 1 : -1; + } + if (a.getKind() < b.getKind()) { + return -1; + } + if (a.getKind() > b.getKind()) { + return 1; + } + assert(a.getKind() == b.getKind()); + int64_t a_value = 0; + int64_t b_value = 0; + switch (a.getKind()) { + case AffineExprKind::Add: + case AffineExprKind::FloorDiv: + case AffineExprKind::CeilDiv: + case AffineExprKind::Mul: + case AffineExprKind::Mod: { + auto a_bin = mlir::cast(a); + auto b_bin = mlir::cast(b); + auto lhs = CompareExprs(a_bin.getLHS(), b_bin.getLHS()); + if (lhs != 0) { + return lhs; + } + return CompareExprs(a_bin.getRHS(), b_bin.getRHS()); + } + case AffineExprKind::Constant: { + a_value = mlir::cast(a).getValue(); + b_value = mlir::cast(b).getValue(); + break; + } + case AffineExprKind::SymbolId: { + a_value = mlir::cast(a).getPosition(); + b_value = mlir::cast(b).getPosition(); + break; + } + case AffineExprKind::DimId: { + a_value = mlir::cast(a).getPosition(); + b_value = mlir::cast(b).getPosition(); + break; + } + } + return a_value < b_value ? -1 : (a_value > b_value ? 1 : 0); +} + +AffineExpr CanonicalizeOrder(AffineExpr in) { + if (auto binop = mlir::dyn_cast(in)) { + auto lhs = CanonicalizeOrder(binop.getLHS()); + auto rhs = CanonicalizeOrder(binop.getRHS()); + if ((binop.getKind() == AffineExprKind::Add || + binop.getKind() == AffineExprKind::Mul) && + CompareExprs(lhs, rhs) > 0) { + std::swap(lhs, rhs); + } + return getAffineBinaryOpExpr(binop.getKind(), lhs, rhs); + } + return in; +} + AffineExpr AffineExprSimplifier::SimplifyOnce(AffineExpr expr) { switch (expr.getKind()) { case AffineExprKind::Mul: { @@ -478,7 +542,11 @@ AffineMap AffineExprSimplifier::Simplify(AffineMap affine_map) { results.push_back(simplified); } if (nothing_changed) { - return affine_map; + for (auto& result : results) { + result = CanonicalizeOrder(result); + } + return AffineMap::get(affine_map.getNumDims(), affine_map.getNumSymbols(), + results, affine_map.getContext()); } return Simplify(AffineMap::get(affine_map.getNumDims(), affine_map.getNumSymbols(), results, diff --git a/third_party/xla/xla/service/gpu/model/indexing_map_test.cc b/third_party/xla/xla/service/gpu/model/indexing_map_test.cc index 20807546347c0c..f753d945cf15d2 100644 --- a/third_party/xla/xla/service/gpu/model/indexing_map_test.cc +++ b/third_party/xla/xla/service/gpu/model/indexing_map_test.cc @@ -575,7 +575,7 @@ TEST_F(IndexingMapTest, ParseAffineMap(serialized_map, &mlir_context_), {10, 10, 10}, {}); indexing_map.Simplify(GetIndexingMapForInstruction); EXPECT_THAT(indexing_map.ToString(printer_), MatchIndexingString(R"( - (d0, d1, d2) -> (d0 * 2 + (d1 + d2 floordiv 4) floordiv 2, + (d0, d1, d2) -> (d0 * 2 + (d2 floordiv 4 + d1) floordiv 2, (d1 * 4 + d2) mod 8) domain: d0 in [0, 9] @@ -619,7 +619,7 @@ TEST_F(IndexingMapTest, AffineMapSimplification_SimplifyReshape_Regression) { ParseAffineMap(serialized_map, &mlir_context_), {}, {128}); indexing_map.Simplify(GetIndexingMapForInstruction); EXPECT_THAT(indexing_map.ToString(printer_), MatchIndexingString(R"( - ()[s0] -> ((s0 * 128) mod 715 + ((s0 * 64) floordiv 715) * 715) + ()[s0] -> (((s0 * 64) floordiv 715) * 715 + (s0 * 128) mod 715) domain: s0 in [0, 127] )")); } @@ -663,7 +663,7 @@ TEST_F(IndexingMapTest, AffineMapSimplification_ExtractFromMod) { indexing_map.Simplify(GetIndexingMapForInstruction); EXPECT_THAT(indexing_map.ToString(printer_), MatchIndexingString(R"( ()[s0, s1, s2, s3] -> ( - s1 + (s0 * 458752 + s2 * 4 + s3 * 512) mod 20000 + (s0 * 458752 + s2 * 4 + s3 * 512) mod 20000 + s1 ) domain: s0 in [0, 871] diff --git a/third_party/xla/xla/service/gpu/model/indexing_test_utils.cc b/third_party/xla/xla/service/gpu/model/indexing_test_utils.cc index 67163320e04275..72becac80c075e 100644 --- a/third_party/xla/xla/service/gpu/model/indexing_test_utils.cc +++ b/third_party/xla/xla/service/gpu/model/indexing_test_utils.cc @@ -140,78 +140,25 @@ AffineExpr ParseAffineExpr(absl::string_view serialized_affine_expr, .getResult(0); } -inline std::vector split_string(std::string s, - std::string pattern) { - std::vector result; - size_t pos = 0; - while ((pos = s.find(pattern)) != std::string::npos) { - result.push_back(s.substr(0, pos)); - s.erase(0, pos + pattern.length()); - } - if (!s.empty()) result.push_back(s); - return result; -} - -inline bool startswith(const std::string& s, const std::string& pattern) { - return s.substr(0, pattern.size()) == pattern; -} - bool ApproximateMatch(std::string_view lhs, std::string_view rhs) { - std::string lhs_unspaced, rhs_unspaced; - for (auto c : lhs) { - if (!std::isspace(c)) { - lhs_unspaced += c; + size_t lhs_length = lhs.size(); + size_t rhs_length = rhs.size(); + size_t l = 0, r = 0; + while (l < lhs_length && r < rhs_length) { + while (l < lhs_length && std::isspace(lhs[l])) { + ++l; } - } - for (auto c : rhs) { - if (!std::isspace(c)) { - rhs_unspaced += c; - } - } - - if (lhs_unspaced.find("###") == std::string::npos) - return lhs_unspaced == rhs_unspaced; - - std::vector frags = split_string(lhs_unspaced, "###"); - - while (frags.size() >= 2) { - if (!startswith(rhs_unspaced, frags[0])) { - return false; + while (r < rhs_length && std::isspace(rhs[r])) { + ++r; } - - rhs_unspaced = rhs_unspaced.substr(frags[0].size()); - - auto terms = split_string(frags[1], "+"); - // iterate through permutations of terms - std::vector indexes(terms.size()); - for (auto i = 0; i < terms.size(); i++) { - indexes[i] = i; + if (l == lhs_length || r == rhs_length) { + continue; } - bool match = false; - do { - std::string permuted = ""; - for (auto i : indexes) { - permuted += terms[i] + "+"; - } - permuted.pop_back(); - if (startswith(rhs_unspaced, permuted)) { - match = true; - break; - } - } while (std::next_permutation(indexes.begin(), indexes.end())); - - if (!match) { + if (lhs[l++] != rhs[r++]) { return false; } - - rhs_unspaced = rhs_unspaced.substr(frags[1].size()); - frags.erase(frags.begin()); - frags.erase(frags.begin()); } - if (frags.empty()) - return rhs_unspaced.empty(); - else - return rhs_unspaced == frags[0]; + return l == lhs_length && r == rhs_length; } } // namespace gpu From 055675be44b77097a01b8fff9275ce11df238918 Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Tue, 14 May 2024 04:13:51 -0700 Subject: [PATCH 106/478] PR #12434: [GPU] Fix OSS compilation problems in a previously disabled test. Imported from GitHub PR https://github.com/openxla/xla/pull/12434 Copybara import of the project: -- 723c9bb29adfcc33c015b74f90ce8024c2f79255 by Ilia Sergachev : [GPU] Fix OSS compilation problems in a previously disabled test. Merging this change closes #12434 PiperOrigin-RevId: 633533757 --- .../xla/xla/service/gpu/ir_emitter_triton_test.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc index 111f8e276e1de8..972b5cb57c8b6c 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc @@ -284,7 +284,7 @@ ENTRY e { })"; TritonGemmConfig config(16, 16, 32, 1, 1, 1); - EXPECT_OK( + TF_EXPECT_OK( CreateTritonIrAndFileCheck(kHloText, config, EmitMatMul, "triton_dot", R"( CHECK: tt.func @triton_fn(%[[LHS:.*]]: !tt.ptr {tt.divisibility = 16 : i32}, %[[RHS:.*]]: !tt.ptr {tt.divisibility = 16 : i32}, %[[OUT:.*]]: !tt.ptr {tt.divisibility = 16 : i32}) { CHECK-DAG: %[[ZERO_KN:.*]] = arith.constant dense<0.000000e+00> : tensor<32x16xf32> @@ -1083,8 +1083,8 @@ ENTRY main { ParseAndReturnVerifiedModule(kHloText)); TritonGemmConfig config(16, 64, 32, 1, 1, 1); - ASSERT_OK(CreateTritonIrAndFileCheck(kHloText, config, EmitSoftMax, - "triton_softmax_computation", R"( + TF_ASSERT_OK(CreateTritonIrAndFileCheck(kHloText, config, EmitSoftMax, + "triton_softmax_computation", R"( // CHECK: #[[MAP:.*]] = affine_map<()[s0] -> (s0 * 16)> // CHECK-LABEL: tt.func @triton_fn( // CHECK-SAME: %[[P0:[^:]*]]: !tt.ptr {tt.divisibility = 16 : i32}, @@ -4550,7 +4550,7 @@ ENTRY e { } )"; TritonGemmConfig config(32, 32, 32, 1, 1, 1); - ASSERT_OK( + TF_ASSERT_OK( CreateTritonIrAndFileCheck(kHloText, config, EmitMatMul, "triton_dot", R"( CHECK: %[[INFINITY:.*]] = arith.constant dense<0x7F800000> : tensor<32x32xf32> CHECK: %[[C_MASK:.*]] = arith.constant dense<-65536> : tensor<32x32xi32> @@ -4881,7 +4881,7 @@ ENTRY e { } )"; TritonGemmConfig config(32, 32, 32, 1, 1, 1); - ASSERT_OK( + TF_ASSERT_OK( CreateTritonIrAndFileCheck(kHloText, config, EmitMatMul, "triton_dot", R"( CHECK: %[[INFINITY:.*]] = arith.constant dense<0x7F800000> : tensor<32x32xf32> CHECK: %[[C_MASK:.*]] = arith.constant dense<-65536> : tensor<32x32xi32> From 614ea89256e015f28294ea081a76423ae480f6b9 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Tue, 14 May 2024 04:36:11 -0700 Subject: [PATCH 107/478] Integrate LLVM at llvm/llvm-project@79a6a7e28fff Updates LLVM usage to match [79a6a7e28fff](https://github.com/llvm/llvm-project/commit/79a6a7e28fff) PiperOrigin-RevId: 633538634 --- third_party/llvm/workspace.bzl | 4 ++-- third_party/stablehlo/temporary.patch | 22 +++++++++++++++++++ .../xla/third_party/stablehlo/temporary.patch | 22 +++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index 669cff9644af1a..eb3a0f80219c81 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "e6b2197a89f5d6d0f56a03c03b8afda561eee899" - LLVM_SHA256 = "a2398fc87e7c4ef96f17999a83240d421686718a8f095fed718b7539576c1fb7" + LLVM_COMMIT = "79a6a7e28fffd14e54a9a208af12d724b6eeb2d4" + LLVM_SHA256 = "ce0caa77cca929e29b0a6304820b833f4514a56bbfaaa533df846077916aa72d" tf_http_archive( name = name, diff --git a/third_party/stablehlo/temporary.patch b/third_party/stablehlo/temporary.patch index 2e395d99d26cf4..f906a856ae69e1 100755 --- a/third_party/stablehlo/temporary.patch +++ b/third_party/stablehlo/temporary.patch @@ -361,6 +361,28 @@ diff --ruN a/stablehlo/stablehlo/conversions/tosa/tests/binary.mlir b/stablehlo/ // CHECK: stablehlo.divide %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10xf32> return %0 : tensor<10xf32> +@@ -123,7 +123,7 @@ + + // CHECK-LABEL: @maximum_f64 + func.func @maximum_f64(%arg0 : tensor<10xf64>, %arg1 : tensor<10xf64>) -> tensor<10xf64> { +- // CHECK: stablehlo.maximum ++ // CHECK: tosa.maximum + %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor<10xf64>, tensor<10xf64>) -> tensor<10xf64> + return %0 : tensor<10xf64> + } +diff --ruN a/stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir b/stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir +--- stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir ++++ stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir +@@ -9,8 +9,7 @@ + + // CHECK-LABEL: @constant_f64 + func.func @constant_f64() -> tensor<10xf64> { +- // TOSA does not support 64-bit types, so this should not legalize. +- // CHECK: stablehlo.constant ++ // CHECK: tosa.const + %0 = stablehlo.constant dense<0.000000e+00> : tensor<10xf64> + return %0 : tensor<10xf64> + } diff --ruN a/stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll b/stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll --- stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll +++ stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll diff --git a/third_party/xla/third_party/stablehlo/temporary.patch b/third_party/xla/third_party/stablehlo/temporary.patch index 2e395d99d26cf4..f906a856ae69e1 100755 --- a/third_party/xla/third_party/stablehlo/temporary.patch +++ b/third_party/xla/third_party/stablehlo/temporary.patch @@ -361,6 +361,28 @@ diff --ruN a/stablehlo/stablehlo/conversions/tosa/tests/binary.mlir b/stablehlo/ // CHECK: stablehlo.divide %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10xf32> return %0 : tensor<10xf32> +@@ -123,7 +123,7 @@ + + // CHECK-LABEL: @maximum_f64 + func.func @maximum_f64(%arg0 : tensor<10xf64>, %arg1 : tensor<10xf64>) -> tensor<10xf64> { +- // CHECK: stablehlo.maximum ++ // CHECK: tosa.maximum + %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor<10xf64>, tensor<10xf64>) -> tensor<10xf64> + return %0 : tensor<10xf64> + } +diff --ruN a/stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir b/stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir +--- stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir ++++ stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir +@@ -9,8 +9,7 @@ + + // CHECK-LABEL: @constant_f64 + func.func @constant_f64() -> tensor<10xf64> { +- // TOSA does not support 64-bit types, so this should not legalize. +- // CHECK: stablehlo.constant ++ // CHECK: tosa.const + %0 = stablehlo.constant dense<0.000000e+00> : tensor<10xf64> + return %0 : tensor<10xf64> + } diff --ruN a/stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll b/stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll --- stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll +++ stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll From 63bb7339a2bc98af97c61e86eedbbd60c123529c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 05:17:35 -0700 Subject: [PATCH 108/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633547952 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 0556051577f5e4..7322cef85415f5 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugstr +go/debugstr op { name: "Abort" attr { From e0e64ad7bbae544a07df6e52ca9794c4138b0554 Mon Sep 17 00:00:00 2001 From: Thomas Joerg Date: Tue, 14 May 2024 05:20:31 -0700 Subject: [PATCH 109/478] [XLA:GPU] Introduce `GpuCompiler::GetToolkit` method for subclasses to provide the CUDA or ROCm versions. Remove all `#if GOOGLE_CUDA` and friends from gpu_compiler.cc PiperOrigin-RevId: 633548576 --- third_party/xla/xla/service/gpu/BUILD | 10 ++++----- .../xla/xla/service/gpu/amdgpu_compiler.cc | 8 +++++++ .../xla/xla/service/gpu/amdgpu_compiler.h | 2 ++ .../xla/xla/service/gpu/gpu_compiler.cc | 21 ++++--------------- .../xla/xla/service/gpu/gpu_compiler.h | 2 ++ .../xla/xla/service/gpu/nvptx_compiler.cc | 3 +++ .../xla/xla/service/gpu/nvptx_compiler.h | 2 ++ .../xla/xla/tests/llvm_compiler_test.cc | 4 ++++ 8 files changed, 29 insertions(+), 23 deletions(-) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index e665640e8ce5eb..223aa418be67e7 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -3387,9 +3387,6 @@ cc_library( hdrs = if_gpu_is_configured([ "gpu_compiler.h", ]), - local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]) + if_rocm_is_configured([ - "TENSORFLOW_USE_ROCM=1", - ]), deps = if_gpu_is_configured([ ":gpu_p2p_pipeliner", ":pipelined_p2p_rewriter", @@ -3460,7 +3457,6 @@ cc_library( "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:variant", - "@local_config_cuda//cuda:cuda_headers", "@llvm-project//llvm:AsmParser", "@llvm-project//llvm:BitReader", "@llvm-project//llvm:BitWriter", @@ -3579,7 +3575,6 @@ cc_library( "//xla/stream_executor:device_description", "//xla/stream_executor", "//xla/stream_executor/gpu:gpu_driver_header", - "//xla/stream_executor/cuda:cuda_platform_id", "//xla/stream_executor/integrations:device_mem_allocator", "//xla/translate/hlo_to_mhlo:hlo_utils", "//xla/translate/mhlo_to_hlo:location_exporter", @@ -3610,7 +3605,6 @@ cc_library( "//xla/service:hlo_ordering", "//xla/service:layout_assignment", "//xla/service:logical_buffer", - "//xla/stream_executor/rocm:rocm_platform_id", "@local_tsl//tsl/platform:numbers", ]) + xla_internal(["service:export_hlo"]) + [ ":command_buffer_scheduling", @@ -3787,6 +3781,7 @@ cc_library( "@com_google_absl//absl/cleanup", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:node_hash_map", + "@local_config_cuda//cuda:cuda_headers", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", @@ -3929,6 +3924,7 @@ cc_library( srcs = [ "amdgpu_compiler_registration.cc", ], + local_defines = if_rocm_is_configured(["TENSORFLOW_USE_ROCM=1"]), tags = ["manual"], deps = [ ":amdgpu_compiler_impl", @@ -4017,6 +4013,8 @@ cc_library( "//xla/stream_executor:device_description", "//xla/stream_executor:stream_executor_headers", "//xla/stream_executor/rocm:rocm_platform_id", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@llvm-project//llvm:ir_headers", diff --git a/third_party/xla/xla/service/gpu/amdgpu_compiler.cc b/third_party/xla/xla/service/gpu/amdgpu_compiler.cc index 2a59d086d0223a..f8621e76610000 100644 --- a/third_party/xla/xla/service/gpu/amdgpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/amdgpu_compiler.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include "absl/log/log.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "llvm/IR/Module.h" @@ -90,6 +91,13 @@ struct ConvBfloat16Support : public FloatSupport { } // namespace +int32_t AMDGPUCompiler::GetToolkitVersion() const { +#if TENSORFLOW_USE_ROCM + return TF_ROCM_VERSION; +#endif + LOG(FATAL) << "Failed to get ROCm version."; +} + absl::Status AMDGPUCompiler::OptimizeHloConvolutionCanonicalization( HloModule* hlo_module, se::GpuComputeCapability gpu_version, se::dnn::VersionInfo dnn_version, diff --git a/third_party/xla/xla/service/gpu/amdgpu_compiler.h b/third_party/xla/xla/service/gpu/amdgpu_compiler.h index 9f4ad9f656256f..477ac1aeb1028d 100644 --- a/third_party/xla/xla/service/gpu/amdgpu_compiler.h +++ b/third_party/xla/xla/service/gpu/amdgpu_compiler.h @@ -39,6 +39,8 @@ class AMDGPUCompiler : public GpuCompiler { public: AMDGPUCompiler(); + int32_t GetToolkitVersion() const override; + absl::Status OptimizeHloConvolutionCanonicalization( HloModule* hlo_module, se::GpuComputeCapability gpu_version, se::dnn::VersionInfo dnn_version, diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index 6c3e2e70e05422..55edb1e4b00336 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -246,13 +246,6 @@ limitations under the License. #include "tsl/platform/threadpool.h" #include "tsl/profiler/lib/traceme.h" -#if GOOGLE_CUDA -#include "third_party/gpus/cuda/include/cuda.h" -#include "xla/stream_executor/cuda/cuda_platform_id.h" -#elif TENSORFLOW_USE_ROCM -#include "xla/stream_executor/rocm/rocm_platform_id.h" -#endif - #ifdef PLATFORM_GOOGLE #include "xla/hlo/experimental/auto_sharding/auto_sharding.h" #endif // PLATFORM_GOOGLE @@ -2084,11 +2077,9 @@ absl::StatusOr> GpuCompiler::RunBackend( absl::StatusOr>> GpuCompiler::CompileAheadOfTime(std::unique_ptr module_group, const AotCompilationOptions& options) { -#if GOOGLE_CUDA - CHECK(options.PlatformId() == se::cuda::kCudaPlatformId); -#elif TENSORFLOW_USE_ROCM - CHECK(options.PlatformId() == se::rocm::kROCmPlatformId); -#endif + // Check that we are on the platform (CUDA or ROCm) that was chosen for AOT + // compilation. + CHECK_EQ(options.PlatformId(), PlatformId()); std::vector> modules = module_group->ConsumeModules(); @@ -2223,11 +2214,7 @@ absl::Status GpuCompiler::RunPostSchedulingPipelines( { HloPassPipeline pipeline("command-buffer-scheduling"); auto driver_version = se::gpu::GpuDriver::GetDriverVersion(); -#if GOOGLE_CUDA - constexpr int toolkit_version = CUDA_VERSION; -#else - constexpr int toolkit_version = TF_ROCM_VERSION; -#endif + const int32_t toolkit_version = GetToolkitVersion(); pipeline.AddPass( gpu_device_info, toolkit_version, driver_version.value_or(toolkit_version)); diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.h b/third_party/xla/xla/service/gpu/gpu_compiler.h index 9d30a471deda8a..6ee9de66fc851f 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.h +++ b/third_party/xla/xla/service/gpu/gpu_compiler.h @@ -116,6 +116,8 @@ class GpuCompiler : public LLVMCompiler { return &FusionCanShareBufferHint; } + virtual int32_t GetToolkitVersion() const = 0; + protected: struct BackendCompileResult { std::string asm_text; diff --git a/third_party/xla/xla/service/gpu/nvptx_compiler.cc b/third_party/xla/xla/service/gpu/nvptx_compiler.cc index 6a1fa45926b64c..94b1314fd6cac5 100644 --- a/third_party/xla/xla/service/gpu/nvptx_compiler.cc +++ b/third_party/xla/xla/service/gpu/nvptx_compiler.cc @@ -36,6 +36,7 @@ limitations under the License. #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" #include "absl/synchronization/mutex.h" +#include "third_party/gpus/cuda/include/cuda.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" @@ -175,6 +176,8 @@ class MatmulBfloat16Support : public FloatSupport { } // namespace +int32_t NVPTXCompiler::GetToolkitVersion() const { return CUDA_VERSION; } + absl::Status NVPTXCompiler::OptimizeHloConvolutionCanonicalization( HloModule* hlo_module, se::GpuComputeCapability gpu_version, se::dnn::VersionInfo dnn_version, diff --git a/third_party/xla/xla/service/gpu/nvptx_compiler.h b/third_party/xla/xla/service/gpu/nvptx_compiler.h index 3d7a770282b134..c25c850a915003 100644 --- a/third_party/xla/xla/service/gpu/nvptx_compiler.h +++ b/third_party/xla/xla/service/gpu/nvptx_compiler.h @@ -53,6 +53,8 @@ class NVPTXCompiler : public GpuCompiler { public: NVPTXCompiler(); + int32_t GetToolkitVersion() const override; + absl::Status OptimizeHloConvolutionCanonicalization( HloModule* hlo_module, se::GpuComputeCapability gpu_version, se::dnn::VersionInfo dnn_version, diff --git a/third_party/xla/xla/tests/llvm_compiler_test.cc b/third_party/xla/xla/tests/llvm_compiler_test.cc index 5f826254f2524b..32b2a5be85e3d1 100644 --- a/third_party/xla/xla/tests/llvm_compiler_test.cc +++ b/third_party/xla/xla/tests/llvm_compiler_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "xla/service/llvm_compiler.h" +#include #include #include #include @@ -55,6 +56,9 @@ class GpuDummyCompiler : public GpuCompiler { public: GpuDummyCompiler() : GpuCompiler(kGpuPlatformId, kDummyTriple, kDummyLayout) {} + + int32_t GetToolkitVersion() const override { return 0; } + Status OptimizeHloConvolutionCanonicalization( HloModule* hlo_module, se::GpuComputeCapability gpu_version, se::dnn::VersionInfo dnn_version, From 08a13d2991724a4b0039237ce07340e16b760656 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Tue, 14 May 2024 06:16:08 -0700 Subject: [PATCH 110/478] Integrate LLVM at llvm/llvm-project@e6d3a4212d20 Updates LLVM usage to match [e6d3a4212d20](https://github.com/llvm/llvm-project/commit/e6d3a4212d20) PiperOrigin-RevId: 633561628 --- third_party/llvm/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index eb3a0f80219c81..8d2c0c7e5a2254 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "79a6a7e28fffd14e54a9a208af12d724b6eeb2d4" - LLVM_SHA256 = "ce0caa77cca929e29b0a6304820b833f4514a56bbfaaa533df846077916aa72d" + LLVM_COMMIT = "e6d3a4212d20b49a8e63f11fedea79cccf261479" + LLVM_SHA256 = "a9581601d91836d182180e35a698550b9c8257eacead9e5cc7ec956128200fce" tf_http_archive( name = name, From 84fc9613ab556faeec2f3c7453e52f68cd083c51 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Tue, 14 May 2024 06:33:36 -0700 Subject: [PATCH 111/478] Integrate Triton up to [@4caedc2](https://github.com/openai/triton/commits/4caedc2a51f5c81f7b63212b9bcb76b430102913) PiperOrigin-RevId: 633565503 --- third_party/triton/temporary/pipelining.patch | 472 ------------------ third_party/triton/temporary/series.bzl | 2 - .../triton/temporary/support_ceil_op.patch | 138 ----- third_party/triton/workspace.bzl | 4 +- .../triton/xla_extensions/env_vars.patch | 14 - third_party/triton/xla_extensions/series.bzl | 1 - .../xla_extensions/sparse_dot_base.patch | 99 ++-- .../sparse_dot_fixes_y24w17.patch | 58 +-- .../xla_extensions/sparse_dot_nvgpu.patch | 32 +- .../xla_extensions/sparse_dot_passes.patch | 142 +++--- .../triton/temporary/pipelining.patch | 472 ------------------ .../third_party/triton/temporary/series.bzl | 2 - .../triton/temporary/support_ceil_op.patch | 138 ----- .../xla/third_party/triton/workspace.bzl | 4 +- .../triton/xla_extensions/env_vars.patch | 14 - .../triton/xla_extensions/series.bzl | 1 - .../xla_extensions/sparse_dot_base.patch | 99 ++-- .../sparse_dot_fixes_y24w17.patch | 58 +-- .../xla_extensions/sparse_dot_nvgpu.patch | 32 +- .../xla_extensions/sparse_dot_passes.patch | 142 +++--- .../xla/service/gpu/ir_emitter_triton_cuda.cc | 9 +- .../service/gpu/tests/sparse_add_layout.mlir | 4 +- 22 files changed, 346 insertions(+), 1591 deletions(-) delete mode 100644 third_party/triton/temporary/pipelining.patch delete mode 100644 third_party/triton/temporary/support_ceil_op.patch delete mode 100644 third_party/triton/xla_extensions/env_vars.patch delete mode 100644 third_party/xla/third_party/triton/temporary/pipelining.patch delete mode 100644 third_party/xla/third_party/triton/temporary/support_ceil_op.patch delete mode 100644 third_party/xla/third_party/triton/xla_extensions/env_vars.patch diff --git a/third_party/triton/temporary/pipelining.patch b/third_party/triton/temporary/pipelining.patch deleted file mode 100644 index 9f5f36aeb5099d..00000000000000 --- a/third_party/triton/temporary/pipelining.patch +++ /dev/null @@ -1,472 +0,0 @@ -This is patching changes upstream from different PRs that fix issues with -pipelining internally. Required changes are upto and including this commit -https://github.com/openai/triton/commit/70f0b7b6e333fe2155c79dfa8bec6ad388073670 -The patch can be removed with the integration that includes these changes. - -diff --git a/include/triton/Analysis/Utility.h b/include/triton/Analysis/Utility.h ---- a/include/triton/Analysis/Utility.h -+++ b/include/triton/Analysis/Utility.h -@@ -8,6 +8,18 @@ - - namespace mlir { - -+inline bool isZeroConst(Value v) { -+ auto constantOp = v.getDefiningOp(); -+ if (!constantOp) -+ return false; -+ if (auto denseAttr = dyn_cast(constantOp.getValueAttr())) -+ return denseAttr.isSplat() && denseAttr.getSplatValue().isZero(); -+ if (auto denseAttr = -+ dyn_cast(constantOp.getValueAttr())) -+ return denseAttr.isSplat() && denseAttr.getSplatValue().isZero(); -+ return false; -+} -+ - class ReduceOpHelper { - public: - explicit ReduceOpHelper(triton::ReduceOp op) -diff --git a/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td b/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td ---- a/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td -+++ b/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td -@@ -45,6 +45,8 @@ def TTG_AsyncWaitOp : TTG_Op<"async_wait - - let arguments = (ins Variadic:$asyncToken, I32Attr:$num); - -+ let results = (outs TTG_AsyncToken:$retToken); -+ - let assemblyFormat = "$asyncToken attr-dict"; - - let extraClassDeclaration = [{ -@@ -229,10 +231,16 @@ def TTG_LocalLoadOp : TTG_Op<"local_load - let description = [{ - Load a tensor from the local memory descriptor into a distributed tensor. - }]; -- let arguments = (ins TT_MemDescType:$src); -+ let arguments = (ins TT_MemDescType:$src, Optional :$token); -+ -+ let builders = [ -+ OpBuilder<(ins "Type":$retType, "Value":$src), -+ [{ -+ build($_builder, $_state, retType, src, /*token=*/static_cast(nullptr)); -+ }]>]; - - // Use qualified() otherwise "!tt.memdesc" is printed as "". -- let assemblyFormat = [{$src attr-dict `:` qualified(type($src)) `->` type($result)}]; -+ let assemblyFormat = [{$src (`token` $token^)? attr-dict `:` qualified(type($src)) `->` type($result)}]; - - let results = (outs TT_Tensor:$result); - } -diff --git a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp b/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp ---- a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp -+++ b/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp -@@ -8,6 +8,7 @@ - #include "mlir/Interfaces/SideEffectInterfaces.h" - #include "mlir/Support/LLVM.h" - #include "triton/Analysis/AxisInfo.h" -+#include "triton/Analysis/Utility.h" - #include "triton/Dialect/Triton/IR/Types.h" - #include "triton/Dialect/Triton/IR/Utility.h" - #include "triton/Dialect/TritonGPU/IR/Attributes.h" -@@ -84,12 +85,13 @@ createAsyncCopy(scf::ForOp &forOp, tt::L - Location loc = loadOp.getLoc(); - Value src = loadOp.getPtr(); - Value mask = loadOp.getMask(); -+ Value other = loadOp.getOther(); - if (!isExpensiveLoadOrStore(loadOp) && opToInfo[loadOp].blockedEncoding) { - // For inexpensive loads that do not directly feed into dot ops - // we want to use optimal layout for the data. - ttg::BlockedEncodingAttr encoding = opToInfo[loadOp].blockedEncoding; - auto convertBlockLayout = [&](Value src) { -- auto ty = src.getType().cast(); -+ auto ty = cast(src.getType()); - auto newTy = - RankedTensorType::get(ty.getShape(), ty.getElementType(), encoding); - auto cvt = -@@ -99,9 +101,11 @@ createAsyncCopy(scf::ForOp &forOp, tt::L - src = convertBlockLayout(src); - if (mask) - mask = convertBlockLayout(mask); -+ if (other) -+ other = convertBlockLayout(other); - } - -- tt::MemDescType allocTy = alloc.getType().cast(); -+ tt::MemDescType allocTy = cast(alloc.getType()); - SmallVector copyOffsets(allocTy.getRank(), zero); - copyOffsets[0] = insertIdx; - tt::MemDescType subviewTy = tt::MemDescType::get( -@@ -110,11 +114,12 @@ createAsyncCopy(scf::ForOp &forOp, tt::L - auto view = - builder.create(loc, subviewTy, alloc, copyOffsets); - Operation *copy = builder.create( -- loc, src, view, mask, loadOp.getOther(), loadOp.getCache(), -- loadOp.getEvict(), loadOp.getIsVolatile()); -+ loc, src, view, mask, other, loadOp.getCache(), loadOp.getEvict(), -+ loadOp.getIsVolatile()); - Operation *commmit = - builder.create(loc, copy->getResult(0)); -- builder.create(loc, commmit->getResult(0), 0); -+ Operation *wait = -+ builder.create(loc, commmit->getResult(0), 0); - - int stage = opToInfo[loadOp].stage; - bool isMMV3Load = opToInfo[loadOp].loadIsMMAV3; -@@ -142,9 +147,21 @@ createAsyncCopy(scf::ForOp &forOp, tt::L - for (auto alloc : allocsToErase) { - alloc.erase(); - } -- auto sharedLoad = -- builder.create(loc, loadOp.getType(), viewLoad); -- loadOp->replaceAllUsesWith(sharedLoad->getResults()); -+ -+ auto sharedLoad = builder.create( -+ loc, loadOp.getType(), viewLoad, wait->getResult(0)); -+ auto result = sharedLoad->getResults(); -+ -+ // Create a select for non-zero other values as they are not handled by -+ // AsyncCopyGlobalToLocalOp for now. -+ Value other = loadOp.getOther(); -+ if (other && !isZeroConst(other)) { -+ auto select = builder.create( -+ loc, loadOp.getType(), mask, sharedLoad.getResult(), other); -+ result = select->getResults(); -+ } -+ -+ loadOp->replaceAllUsesWith(result); - } - loadOp.erase(); - } -@@ -160,7 +177,7 @@ getSharedEncIfAllUsersAreDotEnc(Value va - if (user->getNumResults() != 1) - return std::nullopt; - if (auto memDesc = -- user->getResult(0).getType().dyn_cast()) { -+ dyn_cast(user->getResult(0).getType())) { - // First time we find a shared encoding in the chain, save it and try to - // use it if it is compatible with the other users. - tempAttr = memDesc.getEncoding().cast(); -@@ -203,7 +220,7 @@ getSharedEncIfAllUsersAreDotEnc(Value va - static ttg::BlockedEncodingAttr - getBlockedEncoding(tt::LoadOp loadOp, tt::ModuleAxisInfoAnalysis &axisInfo) { - Value src = loadOp.getPtr(); -- auto ty = src.getType().cast(); -+ auto ty = cast(src.getType()); - auto mod = loadOp->getParentOfType(); - int numWarps = ttg::TritonGPUDialect::getNumWarps(mod); - int threadsPerWarp = ttg::TritonGPUDialect::getThreadsPerWarp(mod); -@@ -221,7 +238,7 @@ getBlockedEncoding(tt::LoadOp loadOp, tt - - static std::optional - getSharedEncoding(tt::LoadOp loadOp, bool isMMAV3) { -- auto ty = loadOp.getType().cast(); -+ auto ty = cast(loadOp.getType()); - auto ctaLayout = ttg::getCTALayout(ty.getEncoding()); - auto blockedOrder = ttg::getOrder(ty.getEncoding()); - SmallVector order; -@@ -285,11 +302,10 @@ loadOpsToDistanceAndUse(scf::ForOp forOp - if (auto mask = loadOp.getMask()) - vec = std::min(vec, axisInfoAnalysis.getMaskAlignment(mask)); - -- auto tensorTy = ptr.getType().dyn_cast(); -+ auto tensorTy = dyn_cast(ptr.getType()); - if (!tensorTy) - return false; -- auto ty = -- tensorTy.getElementType().cast().getPointeeType(); -+ auto ty = cast(tensorTy.getElementType()).getPointeeType(); - unsigned width = vec * ty.getIntOrFloatBitWidth(); - - // We do not pipeline all loads for the following reasons: -@@ -353,7 +369,7 @@ static bool loadIsMMAv3(tt::LoadOp loadO - - // MMA V3 case. - auto newOrder = sharedEnc.getOrder(); -- auto ty = loadOp.getType().cast(); -+ auto ty = cast(loadOp.getType()); - auto oldOrder = ttg::getOrder(ty.getEncoding()); - - // The operand of MMAv3 is in SharedEncoding and its order should not -@@ -497,7 +513,7 @@ collectOpsToPipeline(scf::ForOp forOp, - static Value createAlloc(scf::ForOp &forOp, tt::LoadOp loadOp, - ttg::SharedEncodingAttr sharedEnc, unsigned distance) { - OpBuilder builder(forOp); -- auto ty = loadOp.getType().cast(); -+ auto ty = cast(loadOp.getType()); - SmallVector bufferShape(ty.getShape().begin(), ty.getShape().end()); - bufferShape.insert(bufferShape.begin(), distance); - Type memdescType = mlir::triton::MemDescType::get( -@@ -669,12 +685,23 @@ createSchedule(scf::ForOp forOp, int num - } - }); - -+ auto getNestedOperands = [](Operation *op) -> SmallVector { -+ SmallVector operands; -+ op->walk([&](Operation *nestedOp) { -+ for (Value operand : nestedOp->getOperands()) { -+ if (operand.getParentBlock()->getParentOp()->isAncestor(nestedOp)) -+ operands.push_back(operand); -+ } -+ }); -+ return operands; -+ }; -+ - // Find dependencies with distance of 1. - SmallVector> distanceOneUsers(numStages); - for (int stage = 0; stage < numStages - 1; stage++) { - auto &group = insertAndDeps[stage]; - for (Operation *op : group) { -- for (Value operand : op->getOperands()) { -+ for (Value operand : getNestedOperands(op)) { - if (auto arg = operand.dyn_cast()) { - if (arg.getArgNumber() > 0 && arg.getOwner() == op->getBlock()) { - auto yieldOp = op->getBlock()->getTerminator(); -@@ -905,7 +932,7 @@ static int minNumInterleavedCommitOps(Op - // Look for consecutive wait ops and combine them into a single wait op. - static void - combineRedundantWaitOps(llvm::SmallSetVector &waitOps) { -- llvm::SmallSetVector toDelete; -+ llvm::MapVector toDelete; - for (auto waitOp : waitOps) { - if (toDelete.count(waitOp)) - continue; -@@ -927,10 +954,13 @@ combineRedundantWaitOps(llvm::SmallSetVe - OpBuilder builder(waitGroup.back()); - auto newWaitOp = builder.create(waitOp.getLoc(), - depTokens, minWaitNumber); -- toDelete.insert(waitGroup.begin(), waitGroup.end()); -+ for (auto waitOp : waitGroup) { -+ toDelete[waitOp] = newWaitOp; -+ } - } - for (auto waitOp : toDelete) { -- waitOp->erase(); -+ waitOp.first->replaceAllUsesWith(waitOp.second); -+ waitOp.first->erase(); - } - } - -@@ -1010,7 +1040,7 @@ static void threadValuesThroughWait(ttng - - for (ttng::DotAsyncOp dot : asyncDots) { - for (Value operand : dot.getOperands()) { -- if (operand.getType().isa()) { -+ if (isa(operand.getType())) { - newOperands.insert(operand); - } - } -@@ -1020,15 +1050,21 @@ static void threadValuesThroughWait(ttng - // values in the operation. - auto newWait = builder.create( - wait.getLoc(), llvm::to_vector(newOperands), wait.getPendings()); -+ -+ auto dominatedByNewWait = [&](OpOperand &operand) { -+ auto opInThisBlock = -+ newWait->getBlock()->findAncestorOpInBlock(*operand.getOwner()); -+ return opInThisBlock && newWait->isBeforeInBlock(opInThisBlock); -+ }; - for (int i = 0; i < origNumOperands; i++) { - Value operand = wait.getResult(i); -- if (!operand.getType().isa()) -+ if (!isa(operand.getType())) - operand.replaceAllUsesWith(newWait.getResult(i)); - } - for (int i = origNumOperands; i < newOperands.size(); i++) { - Value operand = newWait.getOperand(i); -- if (!operand.getType().isa()) -- operand.replaceAllUsesExcept(newWait.getResult(i), newWait); -+ if (!isa(operand.getType())) -+ operand.replaceUsesWithIf(newWait.getResult(i), dominatedByNewWait); - } - wait->erase(); - } -@@ -1047,8 +1083,8 @@ static void threadValuesThroughWait(ttng - // 1. All operands that touch shared memory are multi-buffered, i.e. can't read - // an incomplete value while it's being written asynchronously by a load. - // --// 2. During iteration i, nothing other than the loop's `yield` reads the --// result of the dot. -+// 2. If the dot is used by any op in the loop, it must be used under an `if`, -+// and will be synced with a `wait 0` at the beginning of the `if` block. - // - // 3. During iteration i, between the start of the loop up until the first - // `ttng.dot_wait {pendings=0}` op, the result of the dot from iteration i-1 -@@ -1079,7 +1115,7 @@ static std::optional dotCanBeProper - // Rule 1: All shmem operands are multi-buffered. - auto checkOperand = [&](Value operand) { - if (!isa( -- operand.getType().cast().getEncoding())) { -+ cast(operand.getType()).getEncoding())) { - return true; - } - -@@ -1103,17 +1139,41 @@ static std::optional dotCanBeProper - return std::nullopt; - } - -- // Rule 2: The dot should only be used by the for loop's `yield`. -- if (!dotOp->hasOneUse() || -- *dotOp->getUsers().begin() != forOp.getBody()->getTerminator()) { -- LDBG("Can't make dot async because it is not used only by the loop's " -- "`yield`."); -- return std::nullopt; -+ // Rule 2: The dot cannot be unconditionally used by any op in the loop. -+ // Uses under `if` are allowed, as can be explicitly synced with a `wait 0`. -+ int iterArgIdx = -1; -+ Value iterArg = nullptr; -+ SmallVector> queue; -+ for (auto &use : dotOp->getUses()) { -+ queue.push_back({use.getOwner(), use.getOperandNumber()}); - } -- -- // The result of the dot becomes this loop carry value. -- auto iterArgIdx = dotOp->getUses().begin()->getOperandNumber(); -- auto iterArg = forOp.getRegionIterArg(iterArgIdx); -+ while (!queue.empty()) { -+ auto [user, argIdx] = queue.pop_back_val(); -+ if (user->getParentOp() == forOp) { -+ if (isa(user)) { -+ if (iterArg) { -+ // The dot is used by the loop's yield, but we can't have any other -+ // uses. -+ return std::nullopt; -+ } -+ iterArgIdx = argIdx; -+ iterArg = forOp.getRegionIterArg(argIdx); -+ continue; -+ } -+ return std::nullopt; -+ } -+ if (auto ifOp = dyn_cast(user->getParentOp())) { -+ if (isa(user)) { -+ // The result is returned by the if, follow it further. -+ auto uses = ifOp.getResult(argIdx).getUses(); -+ for (auto &use : uses) { -+ queue.push_back({use.getOwner(), use.getOperandNumber()}); -+ } -+ } -+ } else { -+ return std::nullopt; -+ } -+ } - - // Rule 3a: Are the only users of the dot's result from iteration i-1 other - // MMAv3 dots? If so, we're done, this dot can be properly async. -@@ -1181,6 +1241,32 @@ static void insertAsyncDotWaitInLoop( - return; - } - -+ // Insert waits before the users of the properly async dots other than loop -+ // yield. -+ for (auto [asyncDot, iterArgIdx] : properlyAsyncDots) { -+ SmallVector uses; -+ for (auto &use : asyncDot->getUses()) { -+ if (auto yieldOp = dyn_cast(use.getOwner())) { -+ continue; -+ } -+ uses.push_back(&use); -+ } -+ -+ DenseMap> blockToUsers; -+ for (auto use : uses) { -+ auto block = use->getOwner()->getBlock(); -+ blockToUsers[block].push_back(use->get()); -+ } -+ -+ for (auto [block, users] : blockToUsers) { -+ OpBuilder builder(block, block->begin()); -+ auto newWait = builder.create(asyncDot->getLoc(), -+ ArrayRef{}, 0); -+ -+ threadValuesThroughWait(newWait, users); -+ } -+ } -+ - // Add the wait right after the last properly-async dot. This only needs to - // wait for all properly-async dots from the i-1'th iteration to complete, IOW - // we wait until there are most `asyncDots.size()` dots in flight. -diff --git a/test/TritonGPU/loop-pipeline.mlir b/test/TritonGPU/loop-pipeline.mlir ---- a/test/TritonGPU/loop-pipeline.mlir -+++ b/test/TritonGPU/loop-pipeline.mlir -@@ -349,16 +349,21 @@ tt.func @indirect_bmm_scalar_dist_one(%7 - // CHECK: triton_gpu.async_copy_global_to_local - // CHECK: triton_gpu.async_copy_global_to_local - // CHECK: triton_gpu.async_commit_group -+// CHECK: triton_gpu.async_wait {{.*}} {num = 1 : i32} -+// CHECK: scf.for -+// CHECK: tt.dot - // CHECK: %[[NEXT_BUFFER_1:.*]] = tt.addptr %{{.*}}, {{.*}} - // CHECK: triton_gpu.async_copy_global_to_local %[[NEXT_BUFFER_1]] --// CHECK: %[[IND_BUFFER_0:.*]] = triton_gpu.memdesc_subview --// CHECK: %[[IND_BUFFER_1:.*]] = triton_gpu.local_load %[[IND_BUFFER_0]] -+// CHECK-DAG: %[[IND_BUFFER_WAIT_TOKEN:.*]] = triton_gpu.async_wait {{.*}} {num = 1 : i32} -+// CHECK-DAG: %[[IND_BUFFER_0:.*]] = triton_gpu.memdesc_subview -+// CHECK: %[[IND_BUFFER_1:.*]] = triton_gpu.local_load %[[IND_BUFFER_0]] token %[[IND_BUFFER_WAIT_TOKEN]] - // CHECK: %[[IND_BUFFER_2:.*]] = tt.expand_dims %[[IND_BUFFER_1]] {axis = 1 : i32} - // CHECK: %[[IND_BUFFER_3:.*]] = tt.broadcast %[[IND_BUFFER_2]] - // CHECK: %[[IND_BUFFER_4:.*]] = arith.muli {{.*}}, %[[IND_BUFFER_3]] - // CHECK: %[[NEXT_BUFFER_0:.*]] = tt.addptr {{.*}}, %[[IND_BUFFER_4]] - // CHECK: triton_gpu.async_copy_global_to_local %[[NEXT_BUFFER_0]] - // CHECK: triton_gpu.async_wait {{.*}} {num = 1 : i32} -+// CHECK: scf.yield - tt.func @indirect_bmm_vector(%77: tensor<16x16xi64, #BL> {tt.divisibility=16: i32, tt.constancy=16: i32}, - %76: index, - %49: tensor<16x16x!tt.ptr, #AL> {tt.divisibility=16: i32, tt.contiguity=2 : i32}, -diff --git a/test/TritonGPU/reorder-instructions.mlir b/test/TritonGPU/reorder-instructions.mlir ---- a/test/TritonGPU/reorder-instructions.mlir -+++ b/test/TritonGPU/reorder-instructions.mlir -@@ -28,7 +28,7 @@ module attributes {"triton_gpu.num-warps - // CHECK: triton_gpu.async_wait {num = 0 : i32} - // CHECK: triton_gpu.local_dealloc %0 : !tt.memdesc<4x128x64xf16, #shared> - // CHECK: triton_gpu.local_dealloc %1 : !tt.memdesc<4x128x64xf16, #shared> --// CHECK: %2 = triton_gpu.convert_layout %arg0 : tensor<32x32xf32, #blocked> -> tensor<32x32xf32, #blocked1> -+// CHECK: %3 = triton_gpu.convert_layout %arg0 : tensor<32x32xf32, #blocked> -> tensor<32x32xf32, #blocked1> - #blocked = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [32, 1], warpsPerCTA = [1, 4], order = [0, 1]}> - #blocked1 = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [32, 1], warpsPerCTA = [1, 4], order = [1, 0]}> - #shared = #triton_gpu.shared<{vec = 8, perPhase = 1, maxPhase = 4, order = [0, 1]}> -diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp ---- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp -+++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp -@@ -333,17 +333,6 @@ static Value faddAccumulate(ConversionPa - return newStruct; - } - --static bool isZero(Value v) { -- auto constantOp = v.getDefiningOp(); -- if (!constantOp) -- return false; -- if (auto denseAttr = dyn_cast(constantOp.getValueAttr())) -- return denseAttr.isSplat() && denseAttr.getSplatValue().isZero(); -- if (auto denseAttr = -- dyn_cast(constantOp.getValueAttr())) -- return denseAttr.isSplat() && denseAttr.getSplatValue().isZero(); -- return false; --} - - static SmallVector emitWait(ConversionPatternRewriter &rewriter, - Location loc, SmallVector acc, -@@ -402,7 +391,7 @@ LogicalResult convertDot(const LLVMTypeC - int M = 4 * instrShape[0]; - int N = instrShape[1]; - int K = instrShape[2]; -- bool zeroAcc = isZero(c); -+ bool zeroAcc = isZeroConst(c); - auto shapePerCTATile = getShapePerCTATile(mmaEncoding); - int numRepM = ceil(dShapePerCTA[0], shapePerCTATile[0]); - int numRepN = ceil(dShapePerCTA[1], shapePerCTATile[1]); -diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/LoadStoreOpToLLVM.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/LoadStoreOpToLLVM.cpp ---- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/LoadStoreOpToLLVM.cpp -+++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/LoadStoreOpToLLVM.cpp -@@ -924,8 +924,11 @@ struct AsyncWaitOpConversion - auto voidTy = void_ty(ctx); - ptxBuilder.launch(rewriter, loc, voidTy); - -- // Safe to remove the op since it doesn't have any return value. -- rewriter.eraseOp(op); -+ // Drop the result token. -+ Value zero = rewriter.create( -+ op.getLoc(), IntegerType::get(op.getContext(), 32), -+ rewriter.getI32IntegerAttr(0)); -+ rewriter.replaceOp(op, zero); - return success(); - } - }; diff --git a/third_party/triton/temporary/series.bzl b/third_party/triton/temporary/series.bzl index 70313c9b436d2a..214666767949de 100644 --- a/third_party/triton/temporary/series.bzl +++ b/third_party/triton/temporary/series.bzl @@ -6,7 +6,5 @@ internal patch during the next triton integration process. """ temporary_patch_list = [ - "//third_party/triton/temporary:pipelining.patch", - "//third_party/triton/temporary:support_ceil_op.patch", "//third_party/triton/temporary:mma_limit_pred.patch", ] diff --git a/third_party/triton/temporary/support_ceil_op.patch b/third_party/triton/temporary/support_ceil_op.patch deleted file mode 100644 index 71b323d9fccdca..00000000000000 --- a/third_party/triton/temporary/support_ceil_op.patch +++ /dev/null @@ -1,138 +0,0 @@ -Cherry-picking https://github.com/openai/triton/commit/62706e8c518c8c56e56460a43732d8e375217860 -until the next integration lands it. Can be removed as it is already merged. - -diff --git a/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp b/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp ---- a/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp -+++ b/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp -@@ -805,6 +805,7 @@ void mlir::triton::populateElementwiseOp - POPULATE_UNARY_OP(arith::FPToUIOp, LLVM::FPToUIOp) - POPULATE_UNARY_OP(arith::UIToFPOp, LLVM::UIToFPOp) - POPULATE_UNARY_OP(math::FloorOp, math::FloorOp) -+ POPULATE_UNARY_OP(math::CeilOp, math::CeilOp) - POPULATE_UNARY_OP(math::LogOp, math::LogOp) - POPULATE_UNARY_OP(math::Log2Op, math::Log2Op) - POPULATE_UNARY_OP(math::CosOp, math::CosOp) -diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp ---- a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp -+++ b/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp -@@ -125,12 +125,13 @@ void populateMathPatternsAndLegality(Tri - MLIRContext *context = patterns.getContext(); - // Rewrite rule - patterns.add, GenericOpPattern, -- GenericOpPattern, GenericOpPattern, -- GenericOpPattern, GenericOpPattern, -- GenericOpPattern, GenericOpPattern, -- GenericOpPattern, GenericOpPattern, -- GenericOpPattern, GenericOpPattern, -- GenericOpPattern>(typeConverter, context); -+ GenericOpPattern, GenericOpPattern, -+ GenericOpPattern, GenericOpPattern, -+ GenericOpPattern, GenericOpPattern, -+ GenericOpPattern, GenericOpPattern, -+ GenericOpPattern, GenericOpPattern, -+ GenericOpPattern, GenericOpPattern>( -+ typeConverter, context); - } - - // -diff --git a/lib/Dialect/TritonNvidiaGPU/Transforms/PlanCTA.cpp b/lib/Dialect/TritonNvidiaGPU/Transforms/PlanCTA.cpp ---- a/lib/Dialect/TritonNvidiaGPU/Transforms/PlanCTA.cpp -+++ b/lib/Dialect/TritonNvidiaGPU/Transforms/PlanCTA.cpp -@@ -651,10 +651,9 @@ bool CTAPlanner::isElementwiseOp(Operati - math::CeilOp, math::CopySignOp, math::CosOp, math::SinOp, - math::CountLeadingZerosOp, math::CountTrailingZerosOp, - math::CtPopOp, math::ErfOp, math::ExpOp, math::Exp2Op, -- math::FloorOp, math::ExpM1Op, math::FloorOp, math::FmaOp, -- math::LogOp, math::Log10Op, math::Log1pOp, math::Log2Op, -- math::PowFOp, math::RsqrtOp, math::SqrtOp, math::RsqrtOp, -- math::TanhOp>(op)) -+ math::FloorOp, math::ExpM1Op, math::FmaOp, math::LogOp, -+ math::Log10Op, math::Log1pOp, math::Log2Op, math::PowFOp, -+ math::RsqrtOp, math::SqrtOp, math::RsqrtOp, math::TanhOp>(op)) - return true; - if (llvm::isa Value { - return self.create(val); - }) -+ .def("create_ceil", -+ [](TritonOpBuilder &self, Value &val) -> Value { -+ return self.create(val); -+ }) - .def("create_exp", - [](TritonOpBuilder &self, Value &val) -> Value { - return self.create(val); -diff --git a/python/test/unit/language/test_core.py b/python/test/unit/language/test_core.py ---- a/python/test/unit/language/test_core.py -+++ b/python/test/unit/language/test_core.py -@@ -915,10 +915,11 @@ def test_unary_op(dtype_x, expr, num_cta - - - @pytest.mark.interpreter --@pytest.mark.parametrize("dtype_x, expr, x", [(dtype_x, expr, x) -- for dtype_x in ["float32", "float64"] -- for expr in ['exp', 'log', 'cos', 'sin', 'exp2', 'log2', 'sqrt', 'floor'] -- for x in ['x', '3.0']]) -+@pytest.mark.parametrize("dtype_x, expr, x", -+ [(dtype_x, expr, x) -+ for dtype_x in ["float32", "float64"] -+ for expr in ['exp', 'log', 'cos', 'sin', 'exp2', 'log2', 'sqrt', 'floor', 'ceil'] -+ for x in ['x', '3.0']]) - def test_math_op(dtype_x, expr, x, device): - _test_unary(dtype_x, f'tl.{expr}({x})', f'np.{expr}({x}) ', device=device) - -diff --git a/python/triton/language/__init__.py b/python/triton/language/__init__.py ---- a/python/triton/language/__init__.py -+++ b/python/triton/language/__init__.py -@@ -102,7 +102,8 @@ from .core import ( - void, - where, - ) --from .math import (umulhi, exp, exp2, fma, log, log2, cos, rsqrt, sin, sqrt, sqrt_rn, abs, fdiv, div_rn, erf, floor) -+from .math import (umulhi, exp, exp2, fma, log, log2, cos, rsqrt, sin, sqrt, sqrt_rn, abs, fdiv, div_rn, erf, floor, -+ ceil) - from .random import ( - pair_uniform_to_normal, - philox, -@@ -142,6 +143,7 @@ from .random import ( - "builtin", - "cat", - "cdiv", -+ "ceil", - "clamp", - "const", - "const_pointer_type", -diff --git a/python/triton/language/math.py b/python/triton/language/math.py ---- a/python/triton/language/math.py -+++ b/python/triton/language/math.py -@@ -230,6 +230,15 @@ def floor(x, _builder=None): - - - @core.builtin -+@_check_dtype(dtypes=["fp32", "fp64"]) -+@_add_math_1arg_docstr("ceil") -+@core._tensor_member_fn -+def ceil(x, _builder=None): -+ x = core._to_tensor(x, _builder) -+ return core.tensor(_builder.create_ceil(x.handle), x.type) -+ -+ -+@core.builtin - @_add_math_3arg_docstr("fused multiply-add") - def fma(x, y, z, _builder=None): - x = core._to_tensor(x, _builder) -diff --git a/python/triton/runtime/interpreter.py b/python/triton/runtime/interpreter.py ---- a/python/triton/runtime/interpreter.py -+++ b/python/triton/runtime/interpreter.py -@@ -391,6 +391,7 @@ class InterpreterBuilder: - create_fabs = lambda self, arg: self.unary_op(arg, np.abs) - create_iabs = lambda self, arg: self.unary_op(arg, np.abs) - create_floor = lambda self, arg: self.unary_op(arg, np.floor) -+ create_ceil = lambda self, arg: self.unary_op(arg, np.ceil) - create_log = lambda self, arg: self.unary_op(arg, np.log) - create_log2 = lambda self, arg: self.unary_op(arg, np.log2) - create_precise_sqrt = lambda self, arg: self.unary_op(arg, np.sqrt) diff --git a/third_party/triton/workspace.bzl b/third_party/triton/workspace.bzl index 45daf7974a022e..933b815253ffcd 100644 --- a/third_party/triton/workspace.bzl +++ b/third_party/triton/workspace.bzl @@ -8,8 +8,8 @@ load("//third_party/triton/xla_extensions:series.bzl", "extensions_files_patch_l def repo(): """Imports Triton.""" - TRITON_COMMIT = "cl623533461" - TRITON_SHA256 = "7aa74e82e4417a91fc7a7a84b4f6ad2b7e4e58512758d6c78ca3cd1c8771326b" + TRITON_COMMIT = "cl632952902" + TRITON_SHA256 = "f83c4f197cc2ae3b034070ec8189007451093edc445d1e383eb17a1e3808de9a" tf_http_archive( name = "triton", sha256 = TRITON_SHA256, diff --git a/third_party/triton/xla_extensions/env_vars.patch b/third_party/triton/xla_extensions/env_vars.patch deleted file mode 100644 index 955eb6db8da68e..00000000000000 --- a/third_party/triton/xla_extensions/env_vars.patch +++ /dev/null @@ -1,14 +0,0 @@ -Long standing patch due to licensing issues. -diff --git a/include/triton/Tools/Sys/GetEnv.hpp b/include/triton/Tools/Sys/GetEnv.hpp -index 31bc03fe1..a19a432df 100644 ---- a/include/triton/Tools/Sys/GetEnv.hpp -+++ b/include/triton/Tools/Sys/GetEnv.hpp -@@ -34,7 +34,7 @@ inline const std::set ENV_VARS = { - "AMDGCN_ENABLE_DUMP", - "DISABLE_FAST_REDUCTION", - "DISABLE_LLVM_OPT", -- "DISABLE_MMA_V3", -+ "ENABLE_MMA_V3", - "DISABLE_PTXAS_OPT", - "LLVM_IR_ENABLE_DUMP", - "MLIR_ENABLE_DUMP", diff --git a/third_party/triton/xla_extensions/series.bzl b/third_party/triton/xla_extensions/series.bzl index b858da203fb094..af524fb253cbef 100644 --- a/third_party/triton/xla_extensions/series.bzl +++ b/third_party/triton/xla_extensions/series.bzl @@ -4,7 +4,6 @@ applied in the previous copybara workflow. """ extensions_files_patch_list = [ - "//third_party/triton/xla_extensions:env_vars.patch", # File not exported to google "//third_party/triton/xla_extensions:sparse_dot_nvgpu.patch", # Sparsity internal patch "//third_party/triton/xla_extensions:sparse_dot_base.patch", # Sparsity internal patch "//third_party/triton/xla_extensions:sparse_dot_passes.patch", # Sparsity internal patch diff --git a/third_party/triton/xla_extensions/sparse_dot_base.patch b/third_party/triton/xla_extensions/sparse_dot_base.patch index dcacd99740b18f..08b7dd6f7ada87 100644 --- a/third_party/triton/xla_extensions/sparse_dot_base.patch +++ b/third_party/triton/xla_extensions/sparse_dot_base.patch @@ -1,8 +1,9 @@ diff --git a/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td b/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td +index 56f0b6b49..aa91ea9b8 100644 --- a/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td +++ b/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td -@@ -1158,4 +1158,12 @@ section 9.7.13.4.1 for more details. - let extraClassDeclaration = extraDistributedDeclaration; +@@ -1262,4 +1262,16 @@ section 9.7.13.4.1 for more details. + }]; } +def SparseDotMetaEncodingAttr : DistributedEncoding<"SparseDotMetaEncoding", "sparse_dot_meta_encoding"> { @@ -10,14 +11,19 @@ diff --git a/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td b/include/ + + let parameters = (ins "Attribute":$parent); + let assemblyFormat = "`<``{` struct(params) `}``>`"; -+ let extraClassDeclaration = extraDistributedDeclaration; ++ let extraClassDeclaration = extraDistributedDeclaration # [{ ++ SmallVector getContigPerThread() { ++ return getSizePerThread(); ++ }; ++ }]; +} + #endif diff --git a/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td b/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td +index 4966a5f73..d2bb33cfa 100644 --- a/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td +++ b/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td -@@ -7,6 +7,7 @@ include "triton/Dialect/TritonGPU/IR/Tri +@@ -7,6 +7,7 @@ include "triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td" include "mlir/Dialect/Arith/IR/ArithBase.td" include "triton/Dialect/Triton/IR/TritonTypes.td" include "triton/Dialect/Triton/IR/TritonAttrDefs.td" @@ -25,8 +31,8 @@ diff --git a/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td b/include/trito include "mlir/IR/OpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" // Pure include "mlir/Interfaces/InferTypeOpInterface.td" // SameOperandsAndResultType -@@ -214,4 +215,19 @@ def TTG_LocalLoadOp : TTG_Op<"local_load - let results = (outs TT_Tensor:$result); +@@ -232,4 +233,19 @@ def TTG_LocalStoreOp : TTG_Op<"local_store", [MemoryEffects<[MemWrite shape, return encoding; } @@ -70,7 +77,7 @@ diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dia + +LogicalResult SparseDotOp::verify() { + // Verify operand A. -+ auto aTensorTy = getOperand(0).getType().cast(); ++ auto aTensorTy = cast(getOperand(0).getType()); + auto aElemTy = aTensorTy.getElementType(); + if (!aElemTy.isF16() && !aElemTy.isBF16()) + return emitError("element type of operand A is not supported"); @@ -78,7 +85,7 @@ diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dia + if (aShape.size() != 2) return emitError("shape of operand A is incorrect"); + + // Verify operand B. -+ auto bTensorTy = getOperand(1).getType().cast(); ++ auto bTensorTy = cast(getOperand(1).getType()); + auto bElemTy = bTensorTy.getElementType(); + if (!bElemTy.isF16() && !bElemTy.isBF16()) + return emitError("element type of operand B is not supported"); @@ -86,7 +93,7 @@ diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dia + if (bShape.size() != 2) return emitError("shape of operand B is incorrect"); + + // Verify operand C. -+ auto cTensorTy = getOperand(2).getType().cast(); ++ auto cTensorTy = cast(getOperand(2).getType()); + auto cElemTy = cTensorTy.getElementType(); + if (!cElemTy.isF32()) + return emitError("element type of operand C is not supported"); @@ -101,7 +108,7 @@ diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dia + return emitError("operand element types do not match"); + + // Verify sparse metadata. -+ auto metaTy = getOperand(3).getType().cast(); ++ auto metaTy = cast(getOperand(3).getType()); + auto metaShape = metaTy.getShape(); + if (!metaTy.getElementType().isInteger(16) || metaShape.size() != 2) + return emitError("sparse metadata tensor is invalid"); @@ -125,7 +132,7 @@ diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dia +//--- SparseDotMetaEncodingAttr --- +unsigned SparseDotMetaEncodingAttr::getTotalElemsPerThread( + ArrayRef shape, Type eltTy) const { -+ auto mmaLayout = getParent().cast(); ++ auto mmaLayout = mlir::cast(getParent()); + return product(shape) / + (mmaLayout.getWarpsPerCTA()[0] * kMetadataElementsPerWarp); +} @@ -169,9 +176,10 @@ diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dia } // namespace triton } // namespace mlir diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM.cpp +index f8ece0f1c..435610817 100644 --- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM.cpp +++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM.cpp -@@ -38,6 +38,14 @@ Value convertLayout(int opIdx, Conversio +@@ -43,6 +43,14 @@ Value convertLayout(int opIdx, ConversionPatternRewriter &rewriter, const LLVMTypeConverter *typeConverter, Value thread); } @@ -185,19 +193,19 @@ diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM. + namespace { - struct LocalLoadOpConversion -@@ -59,6 +67,10 @@ public: - .isa()) { + using namespace mlir; +@@ -67,6 +75,10 @@ public: + cast(dstLayout).getParent())) { return lowerSharedToDotOperand(op, adaptor, getTypeConverter(), rewriter); } -+ if (srcLayout.isa() && -+ dstLayout.isa()) { ++ if (isa(srcLayout) && ++ isa(dstLayout)) { + return lowerSharedToSparseMeta(op, adaptor, getTypeConverter(), rewriter); + } return failure(); } -@@ -130,6 +142,29 @@ private: +@@ -138,6 +150,26 @@ private: rewriter.replaceOp(op, res); return success(); } @@ -208,13 +216,10 @@ diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM. + const LLVMTypeConverter *typeConverter, + ConversionPatternRewriter &rewriter) const { + auto loc = op.getLoc(); -+ auto sparseEncoding = op.getResult() -+ .getType() -+ .cast() -+ .getEncoding() -+ .cast(); ++ auto sparseEncoding = cast( ++ cast(op.getResult().getType()).getEncoding()); + auto llvmElemTy = typeConverter->convertType( -+ op.getSrc().getType().cast().getElementType()); ++ cast(op.getSrc().getType()).getElementType()); + auto smemObj = getSharedMemoryObjectFromStruct(loc, adaptor.getSrc(), + llvmElemTy, rewriter); + Value res = SharedToSparseDotOperand::convertLayout( @@ -229,6 +234,7 @@ diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM. struct ConvertLayoutOpOptimizedConversion diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp new file mode 100644 +index 000000000..3011cf73d --- /dev/null +++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp @@ -0,0 +1,69 @@ @@ -255,7 +261,7 @@ new file mode 100644 + Value thread) { + // Calculate tile size as number of mask elements (4xi4). + NvidiaMmaEncodingAttr mmaLayout = -+ sparseEncoding.getParent().cast(); ++ cast(sparseEncoding.getParent()); + SmallVector shapePerCTATile = { + kTileSize * mmaLayout.getWarpsPerCTA()[0], + kTileSize / kMetadataElementsPerPackedValue}; @@ -272,7 +278,7 @@ new file mode 100644 + Value rowId = add(mul(warpGroupId, i32_val(kTileSize)), laneGroupId); + + // Calculate number of tile repetitions. -+ auto shape = tensor.getType().cast().getShape(); ++ auto shape = cast(tensor.getType()).getShape(); + int repM = shape[0] / shapePerCTATile[0]; + int repK = shape[1] / shapePerCTATile[1]; + assert(repM > 0 && repK > 0); @@ -302,9 +308,10 @@ new file mode 100644 +} +} // namespace SharedToSparseDotOperand diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp +index 374b9ec9e..1601806b4 100644 --- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp +++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp -@@ -32,6 +32,12 @@ LogicalResult convertAsyncWGMMA(triton:: +@@ -32,6 +32,12 @@ LogicalResult convertAsyncWGMMA(triton::nvidia_gpu::DotAsyncOp op, const LLVMTypeConverter *typeConverter, ConversionPatternRewriter &rewriter, Value thread); @@ -317,7 +324,7 @@ diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp b/thir namespace { struct DotOpConversion : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; -@@ -180,6 +186,18 @@ struct DotWaitOpConversion +@@ -174,6 +180,18 @@ struct DotWaitOpConversion return success(); } }; @@ -336,7 +343,7 @@ diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp b/thir } // namespace void mlir::triton::NVIDIA::populateDotOpToLLVMPatterns( -@@ -188,4 +206,5 @@ void mlir::triton::NVIDIA::populateDotOp +@@ -182,4 +200,5 @@ void mlir::triton::NVIDIA::populateDotOpToLLVMPatterns( patterns.add(typeConverter, benefit); patterns.add(typeConverter, benefit); patterns.add(typeConverter, benefit); @@ -344,6 +351,7 @@ diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp b/thir } diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/Sparse.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/Sparse.cpp new file mode 100644 +index 000000000..34d9212d2 --- /dev/null +++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/Sparse.cpp @@ -0,0 +1,339 @@ @@ -397,15 +405,15 @@ new file mode 100644 + const LLVMTypeConverter *typeConverter, + ConversionPatternRewriter &rewriter) { + // Get number of repetitions across the dimensions. -+ auto aTensorTy = op.getA().getType().cast(); -+ auto bTensorTy = op.getB().getType().cast(); ++ auto aTensorTy = cast(op.getA().getType()); ++ auto bTensorTy = cast(op.getB().getType()); + -+ auto layoutA = aTensorTy.getEncoding().dyn_cast(); -+ auto layoutB = bTensorTy.getEncoding().dyn_cast(); ++ auto layoutA = dyn_cast(aTensorTy.getEncoding()); ++ auto layoutB = dyn_cast(bTensorTy.getEncoding()); + assert(layoutA != nullptr && layoutB != nullptr); + + int bitwidth = aTensorTy.getElementType().getIntOrFloatBitWidth(); -+ auto mmaEnc = layoutA.getParent().cast(); ++ auto mmaEnc = cast(layoutA.getParent()); + auto repA = mmaEnc.getMMAv2Rep(triton::gpu::getShapePerCTA(aTensorTy), + bitwidth, layoutA.getOpIdx()); + auto repB = mmaEnc.getMMAv2Rep(triton::gpu::getShapePerCTA(bTensorTy), @@ -435,7 +443,7 @@ new file mode 100644 + } + + // Flatten accumulator values. -+ auto dTensorTy = op.getD().getType().cast(); ++ auto dTensorTy = cast(op.getD().getType()); + auto fc = unpackLLElements(loc, adaptor.getC(), rewriter); + + // Create `mma.sp` instruction for 4/8 core matrices. @@ -512,7 +520,7 @@ new file mode 100644 + Location loc, std::vector instrShape, + bool trans, int dimWpt, Value warpId, MemDescType tensorTy, + Value baseDesc, int minor) { -+ auto sharedLayout = tensorTy.getEncoding().cast(); ++ auto sharedLayout = cast(tensorTy.getEncoding()); + int elemBytes = tensorTy.getElementTypeBitWidth() / 8; + int elemsPerSwizzlingRow = + kMmaLineSize / sharedLayout.getPerPhase() / elemBytes; @@ -541,10 +549,10 @@ new file mode 100644 + ConversionPatternRewriter &rewriter, + Value thread) { + // Get number of repetitions across the dimensions. -+ auto aTensorTy = op.getA().getType().cast(); -+ auto bTensorTy = op.getB().getType().cast(); -+ auto dTensorTy = op.getD().getType().cast(); -+ auto mmaEnc = dTensorTy.getEncoding().cast(); ++ auto aTensorTy = cast(op.getA().getType()); ++ auto bTensorTy = cast(op.getB().getType()); ++ auto dTensorTy = cast(op.getD().getType()); ++ auto mmaEnc = cast(dTensorTy.getEncoding()); + + auto shapePerCTA = getShapePerCTA(dTensorTy); + auto shapePerCTATile = getShapePerCTATile(mmaEnc); @@ -573,7 +581,7 @@ new file mode 100644 + auto sharedObj = getSharedMemoryObjectFromStruct( + loc, arg, typeConverter->convertType(tensorTy.getElementType()), + rewriter); -+ auto sharedLayout = tensorTy.getEncoding().cast(); ++ auto sharedLayout = cast(tensorTy.getEncoding()); + auto shape = getShapePerCTA(tensorTy); + auto ord = sharedLayout.getOrder(); + int byteSize = aTensorTy.getElementTypeBitWidth() / 8; @@ -671,9 +679,9 @@ new file mode 100644 + SparseDotOp::Adaptor adaptor, + const LLVMTypeConverter *typeConverter, + ConversionPatternRewriter &rewriter) { -+ auto resultTy = op.getResult().getType().cast(); ++ auto resultTy = cast(op.getResult().getType()); + NvidiaMmaEncodingAttr mmaLayout = -+ resultTy.getEncoding().cast(); ++ cast(resultTy.getEncoding()); + + if (mmaLayout.isAmpere()) { + return convertSparseMMA(op, adaptor, typeConverter, rewriter); @@ -687,9 +695,10 @@ new file mode 100644 + "Unsupported SparseDotOp found when converting TritonGPU to LLVM."); +} diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp +index 738f0fe04..867939f65 100644 --- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp +++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp -@@ -87,8 +87,8 @@ int64_t getSwizzlingFromLayout(const Sha +@@ -88,8 +88,8 @@ int64_t getSwizzlingFromLayout(const SharedEncodingAttr &layout, return swizzlingByteWidth; } diff --git a/third_party/triton/xla_extensions/sparse_dot_fixes_y24w17.patch b/third_party/triton/xla_extensions/sparse_dot_fixes_y24w17.patch index 9d1ae2e91cae3f..ce009aa688e9bf 100644 --- a/third_party/triton/xla_extensions/sparse_dot_fixes_y24w17.patch +++ b/third_party/triton/xla_extensions/sparse_dot_fixes_y24w17.patch @@ -1,30 +1,8 @@ -diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp ---- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp -+++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp -@@ -22,16 +22,16 @@ Value convertLayout( - // Calculate tile size as number of mask elements (4xi4). - NvidiaMmaEncodingAttr mmaLayout = - sparseEncoding.getParent().cast(); -+ SmallVector warpsPerCTA = mmaLayout.getWarpsPerCTA(); - SmallVector shapePerCTATile = { -- kTileSize * mmaLayout.getWarpsPerCTA()[0], -- kTileSize / kMetadataElementsPerPackedValue}; -+ kTileSize * warpsPerCTA[0], kTileSize / kMetadataElementsPerPackedValue}; - Value strideM = smemObj.strides[0]; - Value strideK = smemObj.strides[1]; - - // Calculate offset in the tile for the current thread. - Value threadsPerWarp = i32_val(kThreadsPerWarp); - Value warpId = udiv(thread, threadsPerWarp); -- Value warpGroupId = urem(warpId, i32_val(shapePerCTATile[0] / kTileSize)); -+ Value warpGroupId = udiv(warpId, i32_val(warpsPerCTA[1])); - Value laneId = urem(thread, threadsPerWarp); - Value laneGroupId = udiv(laneId, i32_val(kThreadsInGroup)); - Value columnId = urem(laneId, i32_val(shapePerCTATile[1])); diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp +index 0516fc56f..1f27f8a43 100644 --- a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp +++ b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp -@@ -139,6 +139,7 @@ class BlockedToMMA : public mlir::Rewrit +@@ -142,6 +142,7 @@ class BlockedToMMA : public mlir::RewritePattern { mlir::TypeID::get()); } @@ -32,7 +10,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect // Finds the first different bitwidth in the chain of shape-preserving // unary ops that x depends on. // There are two primary scenarios: -@@ -172,7 +173,6 @@ class BlockedToMMA : public mlir::Rewrit +@@ -175,7 +176,6 @@ class BlockedToMMA : public mlir::RewritePattern { return origBitWidth; } @@ -40,7 +18,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect BlockedToMMA(mlir::MLIRContext *context, int computeCapability) : mlir::RewritePattern(tt::DotOp::getOperationName(), 2, context), computeCapability(computeCapability) {} -@@ -388,18 +388,22 @@ class SparseBlockedToMMA : public mlir:: +@@ -389,18 +389,22 @@ class SparseBlockedToMMA : public mlir::RewritePattern { newRetType, oldAcc); if (versionMajor == 2) { @@ -49,7 +27,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect + int kWidth = 32 / minBitwidth; + // convert A operand - auto oldAType = a.getType().cast(); + auto oldAType = cast(a.getType()); - auto newAEncoding = ttg::DotOperandEncodingAttr::get( - ctx, 0, mmaEnc, oldAType.getElementType()); + auto newAEncoding = @@ -59,7 +37,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect a = rewriter.create(a.getLoc(), newAType, a); // convert B operand - auto oldBType = b.getType().cast(); + auto oldBType = cast(b.getType()); - auto newBEncoding = ttg::DotOperandEncodingAttr::get( - ctx, 1, mmaEnc, oldBType.getElementType()); + auto newBEncoding = @@ -67,3 +45,27 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect auto newBType = RankedTensorType::get( oldBType.getShape(), oldBType.getElementType(), newBEncoding); b = rewriter.create(b.getLoc(), newBType, b); +diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp +index 3011cf73d..ea587dced 100644 +--- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp ++++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp +@@ -22,16 +22,16 @@ Value convertLayout( + // Calculate tile size as number of mask elements (4xi4). + NvidiaMmaEncodingAttr mmaLayout = + cast(sparseEncoding.getParent()); ++ SmallVector warpsPerCTA = mmaLayout.getWarpsPerCTA(); + SmallVector shapePerCTATile = { +- kTileSize * mmaLayout.getWarpsPerCTA()[0], +- kTileSize / kMetadataElementsPerPackedValue}; ++ kTileSize * warpsPerCTA[0], kTileSize / kMetadataElementsPerPackedValue}; + Value strideM = smemObj.strides[0]; + Value strideK = smemObj.strides[1]; + + // Calculate offset in the tile for the current thread. + Value threadsPerWarp = i32_val(kThreadsPerWarp); + Value warpId = udiv(thread, threadsPerWarp); +- Value warpGroupId = urem(warpId, i32_val(shapePerCTATile[0] / kTileSize)); ++ Value warpGroupId = udiv(warpId, i32_val(warpsPerCTA[1])); + Value laneId = urem(thread, threadsPerWarp); + Value laneGroupId = udiv(laneId, i32_val(kThreadsInGroup)); + Value columnId = urem(laneId, i32_val(shapePerCTATile[1])); diff --git a/third_party/triton/xla_extensions/sparse_dot_nvgpu.patch b/third_party/triton/xla_extensions/sparse_dot_nvgpu.patch index ced13cff33fd16..791618363b2f34 100644 --- a/third_party/triton/xla_extensions/sparse_dot_nvgpu.patch +++ b/third_party/triton/xla_extensions/sparse_dot_nvgpu.patch @@ -1,7 +1,8 @@ -diff --git a/include/triton/Dialect/NVGPU/IR/NVGPUOps.td b/include/triton/Dialect/NVGPU/IR/NVGPUOps.td ---- a/include/triton/Dialect/NVGPU/IR/NVGPUOps.td -+++ b/include/triton/Dialect/NVGPU/IR/NVGPUOps.td -@@ -87,6 +87,15 @@ def NVGPU_WGMMAOp : NVGPU_Op<"wgmma", [] +diff --git a/third_party/nvidia/include/Dialect/NVGPU/IR/NVGPUOps.td b/third_party/nvidia/include/Dialect/NVGPU/IR/NVGPUOps.td +index ca9d18873..d39bc6ec4 100644 +--- a/third_party/nvidia/include/Dialect/NVGPU/IR/NVGPUOps.td ++++ b/third_party/nvidia/include/Dialect/NVGPU/IR/NVGPUOps.td +@@ -87,6 +87,15 @@ def NVGPU_WGMMAOp : NVGPU_Op<"wgmma", []> { let assemblyFormat = "$opA `,` $opB (`,` $opC^)? attr-dict `:` functional-type(operands, $res)"; } @@ -18,9 +19,10 @@ diff --git a/include/triton/Dialect/NVGPU/IR/NVGPUOps.td b/include/triton/Dialec let arguments = (ins LLVM_AnyPointer:$addr, I32:$ctaId, I32Attr:$bitwidth, I32Attr:$vec); let builders = [ diff --git a/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp b/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp +index e19216520..aacbfb569 100644 --- a/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp +++ b/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp -@@ -688,6 +688,84 @@ public: +@@ -668,6 +668,84 @@ public: } }; @@ -31,7 +33,7 @@ diff --git a/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp b/third_part + using Base::Base; + + std::vector getOutputConstraints(ttn::SparseWGMMAOp op) const { -+ auto outputStructType = op.getType().cast(); ++ auto outputStructType = cast(op.getType()); + uint32_t numOutputRegs = outputStructType.getBody().size(); + std::string output = + outputStructType.getBody().front().isF32() ? "=f" : "=r"; @@ -71,7 +73,7 @@ diff --git a/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp b/third_part + + // Output and operand C + uint32_t numCRegs = -+ op.getType().cast().getBody().size(); ++ cast(op.getType()).getBody().size(); + args += "{"; + for (uint32_t i = 0; i < numCRegs; ++i) { + args += "$" + std::to_string(asmOpIdx++) + (i == numCRegs - 1 ? "" : ","); @@ -105,13 +107,17 @@ diff --git a/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp b/third_part class ConvertNVGPUToLLVM : public ConvertNVGPUToLLVMBase { public: -@@ -711,7 +789,8 @@ public: +@@ -688,10 +766,9 @@ public: + patterns.add>( + context, Cluster_Cta_Id_Op, Constraints({"=r"}), Constraints()); - patterns.add(context); -+ WGMMAWaitGroupOpPattern, StoreDSmemOpPattern, -+ SparseWGMMAOpPattern>(context); +- patterns +- .add( +- context); ++ patterns.add(context); if (applyPatternsAndFoldGreedily(mod, std::move(patterns)).failed()) signalPassFailure(); diff --git a/third_party/triton/xla_extensions/sparse_dot_passes.patch b/third_party/triton/xla_extensions/sparse_dot_passes.patch index 74662cf3c90dc9..9136cb84b24254 100644 --- a/third_party/triton/xla_extensions/sparse_dot_passes.patch +++ b/third_party/triton/xla_extensions/sparse_dot_passes.patch @@ -1,7 +1,8 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp +index 4aa2712ec..16a6253d7 100644 --- a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp +++ b/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp -@@ -277,6 +277,89 @@ struct TritonDotPattern : public OpConve +@@ -279,6 +279,89 @@ struct TritonDotPattern : public OpConversionPattern { } }; @@ -12,7 +13,7 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Co + LogicalResult matchAndRewrite( + triton::gpu::SparseDotOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { -+ RankedTensorType origType = op.getType().cast(); ++ RankedTensorType origType = cast(op.getType()); + auto origShape = origType.getShape(); + auto typeConverter = getTypeConverter(); + int numWarps = typeConverter->getNumWarps(); @@ -40,8 +41,8 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Co + RankedTensorType::get(origShape, origType.getElementType(), dEncoding); + + // a & b must be of smem layout -+ auto aType = adaptor.getA().getType().cast(); -+ auto bType = adaptor.getB().getType().cast(); ++ auto aType = cast(adaptor.getA().getType()); ++ auto bType = cast(adaptor.getB().getType()); + Type aEltType = aType.getElementType(); + Type bEltType = bType.getElementType(); + Attribute aEncoding = aType.getEncoding(); @@ -51,14 +52,14 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Co + Value a = adaptor.getA(); + Value b = adaptor.getB(); + Value c = adaptor.getC(); -+ if (!aEncoding.isa()) { ++ if (!isa(aEncoding)) { + Attribute encoding = triton::gpu::DotOperandEncodingAttr::get( + getContext(), 0, dEncoding, aEltType); + auto dstType = + RankedTensorType::get(aType.getShape(), aEltType, encoding); + a = rewriter.create(a.getLoc(), dstType, a); + } -+ if (!bEncoding.isa()) { ++ if (!isa(bEncoding)) { + Attribute encoding = triton::gpu::DotOperandEncodingAttr::get( + getContext(), 1, dEncoding, bEltType); + auto dstType = @@ -68,11 +69,11 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Co + c = rewriter.create(c.getLoc(), retType, c); + + // aMeta must be of smem layout -+ auto aMetaType = adaptor.getAMeta().getType().cast(); ++ auto aMetaType = cast(adaptor.getAMeta().getType()); + Attribute aMetaEncoding = aMetaType.getEncoding(); + if (!aMetaEncoding) return failure(); + Value aMeta = adaptor.getAMeta(); -+ if (!aMetaEncoding.isa()) { ++ if (!isa(aMetaEncoding)) { + Attribute encoding = + triton::gpu::SparseDotMetaEncodingAttr::get(getContext(), dEncoding); + auto dstType = RankedTensorType::get( @@ -91,17 +92,17 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Co struct TritonCatPattern : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; -@@ -550,6 +633,7 @@ void populateTritonPatterns(TritonGPUTyp - GenericOpPattern, GenericOpPattern, +@@ -553,6 +636,7 @@ void populateTritonPatterns(TritonGPUTypeConverter &typeConverter, + GenericOpPattern, GenericOpPattern, TritonFuncOpPattern>(typeConverter, context); + patterns.insert(typeConverter, context); } // -@@ -788,6 +872,12 @@ public: - IntegerAttr::get( - i32_ty, llvm::APInt(32, computeCapability.getValue()))); +@@ -794,6 +878,12 @@ public: + mod->setAttr(AttrTargetName, + StringAttr::get(context, this->target.getValue())); + // Only transform sparse dot op with undefined layout. + target.addDynamicallyLegalOp( @@ -113,9 +114,10 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Co return signalPassFailure(); diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp +index 098ee85e4..0516fc56f 100644 --- a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp +++ b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp -@@ -42,8 +42,9 @@ static int getMMAVersionSafe(int compute +@@ -44,8 +44,9 @@ static int getMMAVersionSafe(int computeCapability, tt::DotOp op) { return 0; } @@ -126,7 +128,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect auto rank = shape.size(); // Early exit for batched matmul if (rank == 3) -@@ -56,14 +57,14 @@ warpsPerTileV2(tt::DotOp dotOp, const Ar +@@ -58,8 +59,8 @@ warpsPerTileV2(tt::DotOp dotOp, const ArrayRef shape, int numWarps) { auto slices = multiRootGetSlice(dotOp, {filter}, {filter}); bool hasChainedDot = false; for (Operation *op : slices) { @@ -137,14 +139,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect auto resTy = chainedDot.getResult().getType(); if (resTy.getRank() != rank) { continue; - } - if (auto mmaEncoding = -- resTy.getEncoding().dyn_cast()) { -+ resTy.getEncoding().template dyn_cast()) { - return ttg::getWarpsPerCTA(mmaEncoding); - } - hasChainedDot = true; -@@ -101,12 +102,13 @@ warpsPerTileV2(tt::DotOp dotOp, const Ar +@@ -103,12 +104,13 @@ warpsPerTileV2(tt::DotOp dotOp, const ArrayRef shape, int numWarps) { return ret; } @@ -162,7 +157,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect slices.end()) return {(unsigned)numWarps, 1}; -@@ -175,9 +177,10 @@ public: +@@ -178,9 +180,10 @@ public: : mlir::RewritePattern(tt::DotOp::getOperationName(), 2, context), computeCapability(computeCapability) {} @@ -176,7 +171,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect switch (version) { case 2: return warpsPerTileV2(dotOp, shape, numWarps); -@@ -337,6 +340,98 @@ public: +@@ -335,6 +338,98 @@ public: return success(); } }; @@ -201,7 +196,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect + // Check data-types and SM compatibility + RankedTensorType oldRetType = dotOp.getType(); + if (!oldRetType.getEncoding() || -+ oldRetType.getEncoding().isa()) ++ isa(oldRetType.getEncoding())) + return failure(); + + assert(computeCapability >= 80 && @@ -216,7 +211,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect + + auto instrShape = + mmaVersionToInstrShape(versionMajor, retShapePerCTA, -+ a.getType().cast(), numWarps); ++ cast(a.getType()), numWarps); + auto warpsPerTile = BlockedToMMA::getWarpsPerTile( + dotOp, retShapePerCTA, versionMajor, numWarps, instrShape); + ttg::NvidiaMmaEncodingAttr mmaEnc = @@ -232,7 +227,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect + + if (versionMajor == 2) { + // convert A operand -+ auto oldAType = a.getType().cast(); ++ auto oldAType = cast(a.getType()); + auto newAEncoding = ttg::DotOperandEncodingAttr::get( + ctx, 0, mmaEnc, oldAType.getElementType()); + auto newAType = RankedTensorType::get( @@ -240,7 +235,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect + a = rewriter.create(a.getLoc(), newAType, a); + + // convert B operand -+ auto oldBType = b.getType().cast(); ++ auto oldBType = cast(b.getType()); + auto newBEncoding = ttg::DotOperandEncodingAttr::get( + ctx, 1, mmaEnc, oldBType.getElementType()); + auto newBType = RankedTensorType::get( @@ -253,7 +248,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect + + // convert metadata + Value meta = dotOp.getAMeta(); -+ auto oldMetaType = meta.getType().cast(); ++ auto oldMetaType = cast(meta.getType()); + auto newMetaType = RankedTensorType::get( + oldMetaType.getShape(), oldMetaType.getElementType(), + SparseDotMetaEncodingAttr::get(ctx, mmaEnc)); @@ -275,7 +270,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect } // namespace static Value promoteOperand(OpBuilder &builder, Location loc, Value operand, -@@ -397,6 +491,7 @@ public: +@@ -394,6 +489,7 @@ public: mlir::RewritePatternSet patterns(context); patterns.add<::BlockedToMMA>(context, computeCapability); @@ -284,33 +279,31 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect signalPassFailure(); } diff --git a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp b/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp +index 97ca6a840..f0ef124ff 100644 --- a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp +++ b/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp -@@ -47,6 +47,10 @@ struct PipelinedOpInfo { - bool loadIsMMAV3 = false; +@@ -188,6 +188,10 @@ public: + } }; -+bool isDotOp(Operation* op) { ++static bool isDotOp(Operation* op) { + return isa(op); +} + - } // namespace - static bool isMMAv3Dot(Operation *op) { -@@ -165,22 +169,28 @@ getSharedEncIfAllUsersAreDotEnc(Value val) { + auto dot = dyn_cast(op); + if (!dot) +@@ -399,19 +403,28 @@ getSharedEncIfAllUsersAreDotEnc(Value val) { } else { if (!isa(user)) return std::nullopt; -- auto dotOpEnc = user->getResult(0) -- .getType() -- .cast() -- .getEncoding() -- .dyn_cast(); +- auto dotOpEnc = dyn_cast( +- cast(user->getResult(0).getType()).getEncoding()); - if (!dotOpEnc) + auto enc = -+ user->getResult(0).getType().cast().getEncoding(); ++ cast(user->getResult(0).getType()).getEncoding(); + if (isa(enc)) { -+ auto srcTy = val.getType().cast(); ++ auto srcTy = cast(val.getType()); + auto CTALayout = ttg::getCTALayout(srcTy.getEncoding()); + auto order = ttg::getOrder(srcTy.getEncoding()); + unsigned bitWidth = srcTy.getElementType().getIntOrFloatBitWidth(); @@ -321,14 +314,14 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp b + srcTy.getElementType().getIntOrFloatBitWidth(), + /*needTrans=*/false); + } else if (isa(enc)) { -+ auto srcTy = val.getType().cast(); ++ auto srcTy = cast(val.getType()); + tempAttr = ttg::SharedEncodingAttr::get( + val.getContext(), /*vec=*/1, /*perPhase=*/1, /*maxPhase=*/1, + ttg::getOrder(srcTy.getEncoding()), + ttg::getCTALayout(srcTy.getEncoding())); + } else { return std::nullopt; -- auto srcTy = val.getType().cast(); +- auto srcTy = cast(val.getType()); - auto CTALayout = ttg::getCTALayout(srcTy.getEncoding()); - auto order = ttg::getOrder(srcTy.getEncoding()); - unsigned bitWidth = srcTy.getElementType().getIntOrFloatBitWidth(); @@ -341,71 +334,63 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp b } // Check that the shared encodings needed by the users are compatible. if (!tempAttr || (attr != nullptr && attr != tempAttr)) -@@ -313,7 +323,7 @@ loadOpsToDistanceAndUse(scf::ForOp forOp) { +@@ -518,7 +531,7 @@ loadOpsToIndirectionLevelAndUse(scf::ForOp forOp) { }; for (Operation &op : forOp.getBody()->without_terminator()) { - if (!isa(op)) + if (!isDotOp(&op)) continue; + seen.clear(); dfs(&op, 0, &op); - } -@@ -391,7 +401,8 @@ collectOpsToPipeline(scf::ForOp forOp, - // loads. - for (auto &[loadOp, distAndUse] : loadOpToDistAndUse) { - PipelinedOpInfo loadInfo; -- if (auto dot = dyn_cast(distAndUse.second)) { -+ if (isDotOp(distAndUse.second)) { -+ auto dot = dyn_cast(distAndUse.second); - if (loadIsMMAv3(loadOp)) { +@@ -595,7 +608,8 @@ assignMemoryLayouts(llvm::SmallVector> + continue; + } + +- if (auto dot = dyn_cast(use)) { ++ if (isDotOp(use)) { ++ auto dot = dyn_cast(use); + loadInfo.usedByDot = true; + if (loadIsMMAv3(op)) { loadInfo.loadIsMMAV3 = true; - loadInfo.sharedEncoding = -@@ -410,7 +421,7 @@ collectOpsToPipeline(scf::ForOp forOp, +@@ -614,7 +628,7 @@ assignMemoryLayouts(llvm::SmallVector> // The codegen bug is caught by an assertion, so if you think you've // fixed it, feel free to delete this code and see if the assert still // fails. :) - if (!loadInfo.sharedEncoding) { + if (dot && !loadInfo.sharedEncoding) { - if (auto dotEnc = dot.getResult() - .getType() - .getEncoding() -@@ -788,7 +799,7 @@ bool mlir::triton::preProcessLoopAndGetSchedule( - int useStage = opToInfo[info.use].stage; - int numBuffers = useStage - defStage; - -- if (hasMMAV3 && isa(info.use)) { -+ if (hasMMAV3 && isDotOp(info.use)) { - // For MMAv3, we need an extra buffer as this is assumed in the wgmma - // pipelining post-processing. - numBuffers++; + if (auto dotEnc = dyn_cast( + dot.getResult().getType().getEncoding())) { + auto loadTy = cast(op->getResultTypes()[0]); diff --git a/lib/Dialect/TritonGPU/Transforms/ReduceDataDuplication.cpp b/lib/Dialect/TritonGPU/Transforms/ReduceDataDuplication.cpp +index 2211df31b..ee5ff44d8 100644 --- a/lib/Dialect/TritonGPU/Transforms/ReduceDataDuplication.cpp +++ b/lib/Dialect/TritonGPU/Transforms/ReduceDataDuplication.cpp -@@ -36,6 +36,10 @@ public: +@@ -37,6 +37,10 @@ public: auto srcEncoding = srcType.getEncoding(); - if (srcEncoding.isa()) + if (isa(srcEncoding)) return; -+ if (dstType.getEncoding().isa()) { ++ if (isa(dstType.getEncoding())) { + replaceSparseMetaEncoding(cvtOp); + return; + } auto dstDotOp = - dstType.getEncoding().dyn_cast(); + dyn_cast(dstType.getEncoding()); if (!dstDotOp) -@@ -74,6 +78,27 @@ public: +@@ -83,6 +87,27 @@ public: cvtOp.erase(); }); } + + private: + void replaceSparseMetaEncoding(triton::gpu::ConvertLayoutOp cvtOp) { -+ auto srcType = cvtOp.getOperand().getType().cast(); ++ auto srcType = cast(cvtOp.getOperand().getType()); + auto srcEncoding = srcType.getEncoding(); + auto sharedLayout = triton::gpu::SharedEncodingAttr::get( + cvtOp.getContext(), 8, 1, 1, triton::gpu::getOrder(srcEncoding), + triton::gpu::getCTALayout(srcEncoding)); + -+ auto dstType = cvtOp.getType().cast(); ++ auto dstType = cast(cvtOp.getType()); + auto tmpType = triton::MemDescType::get( + dstType.getShape(), dstType.getElementType(), sharedLayout); + @@ -421,6 +406,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/ReduceDataDuplication.cpp b/lib/Di std::unique_ptr mlir::triton::gpu::createReduceDataDuplicationPass() { diff --git a/lib/Dialect/TritonNvidiaGPU/Transforms/FenceInsertion.cpp b/lib/Dialect/TritonNvidiaGPU/Transforms/FenceInsertion.cpp +index f456d36a6..a1dac2b72 100644 --- a/lib/Dialect/TritonNvidiaGPU/Transforms/FenceInsertion.cpp +++ b/lib/Dialect/TritonNvidiaGPU/Transforms/FenceInsertion.cpp @@ -45,7 +45,7 @@ public: @@ -432,7 +418,7 @@ diff --git a/lib/Dialect/TritonNvidiaGPU/Transforms/FenceInsertion.cpp b/lib/Dia return WalkResult::advance(); OpBuilder builder(op); auto a = op->getOperand(0); -@@ -83,7 +83,7 @@ private: +@@ -80,7 +80,7 @@ private: static DenseSet> trace; auto op = operand.getDefiningOp(); // avoid redundant insertion diff --git a/third_party/xla/third_party/triton/temporary/pipelining.patch b/third_party/xla/third_party/triton/temporary/pipelining.patch deleted file mode 100644 index 9f5f36aeb5099d..00000000000000 --- a/third_party/xla/third_party/triton/temporary/pipelining.patch +++ /dev/null @@ -1,472 +0,0 @@ -This is patching changes upstream from different PRs that fix issues with -pipelining internally. Required changes are upto and including this commit -https://github.com/openai/triton/commit/70f0b7b6e333fe2155c79dfa8bec6ad388073670 -The patch can be removed with the integration that includes these changes. - -diff --git a/include/triton/Analysis/Utility.h b/include/triton/Analysis/Utility.h ---- a/include/triton/Analysis/Utility.h -+++ b/include/triton/Analysis/Utility.h -@@ -8,6 +8,18 @@ - - namespace mlir { - -+inline bool isZeroConst(Value v) { -+ auto constantOp = v.getDefiningOp(); -+ if (!constantOp) -+ return false; -+ if (auto denseAttr = dyn_cast(constantOp.getValueAttr())) -+ return denseAttr.isSplat() && denseAttr.getSplatValue().isZero(); -+ if (auto denseAttr = -+ dyn_cast(constantOp.getValueAttr())) -+ return denseAttr.isSplat() && denseAttr.getSplatValue().isZero(); -+ return false; -+} -+ - class ReduceOpHelper { - public: - explicit ReduceOpHelper(triton::ReduceOp op) -diff --git a/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td b/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td ---- a/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td -+++ b/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td -@@ -45,6 +45,8 @@ def TTG_AsyncWaitOp : TTG_Op<"async_wait - - let arguments = (ins Variadic:$asyncToken, I32Attr:$num); - -+ let results = (outs TTG_AsyncToken:$retToken); -+ - let assemblyFormat = "$asyncToken attr-dict"; - - let extraClassDeclaration = [{ -@@ -229,10 +231,16 @@ def TTG_LocalLoadOp : TTG_Op<"local_load - let description = [{ - Load a tensor from the local memory descriptor into a distributed tensor. - }]; -- let arguments = (ins TT_MemDescType:$src); -+ let arguments = (ins TT_MemDescType:$src, Optional :$token); -+ -+ let builders = [ -+ OpBuilder<(ins "Type":$retType, "Value":$src), -+ [{ -+ build($_builder, $_state, retType, src, /*token=*/static_cast(nullptr)); -+ }]>]; - - // Use qualified() otherwise "!tt.memdesc" is printed as "". -- let assemblyFormat = [{$src attr-dict `:` qualified(type($src)) `->` type($result)}]; -+ let assemblyFormat = [{$src (`token` $token^)? attr-dict `:` qualified(type($src)) `->` type($result)}]; - - let results = (outs TT_Tensor:$result); - } -diff --git a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp b/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp ---- a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp -+++ b/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp -@@ -8,6 +8,7 @@ - #include "mlir/Interfaces/SideEffectInterfaces.h" - #include "mlir/Support/LLVM.h" - #include "triton/Analysis/AxisInfo.h" -+#include "triton/Analysis/Utility.h" - #include "triton/Dialect/Triton/IR/Types.h" - #include "triton/Dialect/Triton/IR/Utility.h" - #include "triton/Dialect/TritonGPU/IR/Attributes.h" -@@ -84,12 +85,13 @@ createAsyncCopy(scf::ForOp &forOp, tt::L - Location loc = loadOp.getLoc(); - Value src = loadOp.getPtr(); - Value mask = loadOp.getMask(); -+ Value other = loadOp.getOther(); - if (!isExpensiveLoadOrStore(loadOp) && opToInfo[loadOp].blockedEncoding) { - // For inexpensive loads that do not directly feed into dot ops - // we want to use optimal layout for the data. - ttg::BlockedEncodingAttr encoding = opToInfo[loadOp].blockedEncoding; - auto convertBlockLayout = [&](Value src) { -- auto ty = src.getType().cast(); -+ auto ty = cast(src.getType()); - auto newTy = - RankedTensorType::get(ty.getShape(), ty.getElementType(), encoding); - auto cvt = -@@ -99,9 +101,11 @@ createAsyncCopy(scf::ForOp &forOp, tt::L - src = convertBlockLayout(src); - if (mask) - mask = convertBlockLayout(mask); -+ if (other) -+ other = convertBlockLayout(other); - } - -- tt::MemDescType allocTy = alloc.getType().cast(); -+ tt::MemDescType allocTy = cast(alloc.getType()); - SmallVector copyOffsets(allocTy.getRank(), zero); - copyOffsets[0] = insertIdx; - tt::MemDescType subviewTy = tt::MemDescType::get( -@@ -110,11 +114,12 @@ createAsyncCopy(scf::ForOp &forOp, tt::L - auto view = - builder.create(loc, subviewTy, alloc, copyOffsets); - Operation *copy = builder.create( -- loc, src, view, mask, loadOp.getOther(), loadOp.getCache(), -- loadOp.getEvict(), loadOp.getIsVolatile()); -+ loc, src, view, mask, other, loadOp.getCache(), loadOp.getEvict(), -+ loadOp.getIsVolatile()); - Operation *commmit = - builder.create(loc, copy->getResult(0)); -- builder.create(loc, commmit->getResult(0), 0); -+ Operation *wait = -+ builder.create(loc, commmit->getResult(0), 0); - - int stage = opToInfo[loadOp].stage; - bool isMMV3Load = opToInfo[loadOp].loadIsMMAV3; -@@ -142,9 +147,21 @@ createAsyncCopy(scf::ForOp &forOp, tt::L - for (auto alloc : allocsToErase) { - alloc.erase(); - } -- auto sharedLoad = -- builder.create(loc, loadOp.getType(), viewLoad); -- loadOp->replaceAllUsesWith(sharedLoad->getResults()); -+ -+ auto sharedLoad = builder.create( -+ loc, loadOp.getType(), viewLoad, wait->getResult(0)); -+ auto result = sharedLoad->getResults(); -+ -+ // Create a select for non-zero other values as they are not handled by -+ // AsyncCopyGlobalToLocalOp for now. -+ Value other = loadOp.getOther(); -+ if (other && !isZeroConst(other)) { -+ auto select = builder.create( -+ loc, loadOp.getType(), mask, sharedLoad.getResult(), other); -+ result = select->getResults(); -+ } -+ -+ loadOp->replaceAllUsesWith(result); - } - loadOp.erase(); - } -@@ -160,7 +177,7 @@ getSharedEncIfAllUsersAreDotEnc(Value va - if (user->getNumResults() != 1) - return std::nullopt; - if (auto memDesc = -- user->getResult(0).getType().dyn_cast()) { -+ dyn_cast(user->getResult(0).getType())) { - // First time we find a shared encoding in the chain, save it and try to - // use it if it is compatible with the other users. - tempAttr = memDesc.getEncoding().cast(); -@@ -203,7 +220,7 @@ getSharedEncIfAllUsersAreDotEnc(Value va - static ttg::BlockedEncodingAttr - getBlockedEncoding(tt::LoadOp loadOp, tt::ModuleAxisInfoAnalysis &axisInfo) { - Value src = loadOp.getPtr(); -- auto ty = src.getType().cast(); -+ auto ty = cast(src.getType()); - auto mod = loadOp->getParentOfType(); - int numWarps = ttg::TritonGPUDialect::getNumWarps(mod); - int threadsPerWarp = ttg::TritonGPUDialect::getThreadsPerWarp(mod); -@@ -221,7 +238,7 @@ getBlockedEncoding(tt::LoadOp loadOp, tt - - static std::optional - getSharedEncoding(tt::LoadOp loadOp, bool isMMAV3) { -- auto ty = loadOp.getType().cast(); -+ auto ty = cast(loadOp.getType()); - auto ctaLayout = ttg::getCTALayout(ty.getEncoding()); - auto blockedOrder = ttg::getOrder(ty.getEncoding()); - SmallVector order; -@@ -285,11 +302,10 @@ loadOpsToDistanceAndUse(scf::ForOp forOp - if (auto mask = loadOp.getMask()) - vec = std::min(vec, axisInfoAnalysis.getMaskAlignment(mask)); - -- auto tensorTy = ptr.getType().dyn_cast(); -+ auto tensorTy = dyn_cast(ptr.getType()); - if (!tensorTy) - return false; -- auto ty = -- tensorTy.getElementType().cast().getPointeeType(); -+ auto ty = cast(tensorTy.getElementType()).getPointeeType(); - unsigned width = vec * ty.getIntOrFloatBitWidth(); - - // We do not pipeline all loads for the following reasons: -@@ -353,7 +369,7 @@ static bool loadIsMMAv3(tt::LoadOp loadO - - // MMA V3 case. - auto newOrder = sharedEnc.getOrder(); -- auto ty = loadOp.getType().cast(); -+ auto ty = cast(loadOp.getType()); - auto oldOrder = ttg::getOrder(ty.getEncoding()); - - // The operand of MMAv3 is in SharedEncoding and its order should not -@@ -497,7 +513,7 @@ collectOpsToPipeline(scf::ForOp forOp, - static Value createAlloc(scf::ForOp &forOp, tt::LoadOp loadOp, - ttg::SharedEncodingAttr sharedEnc, unsigned distance) { - OpBuilder builder(forOp); -- auto ty = loadOp.getType().cast(); -+ auto ty = cast(loadOp.getType()); - SmallVector bufferShape(ty.getShape().begin(), ty.getShape().end()); - bufferShape.insert(bufferShape.begin(), distance); - Type memdescType = mlir::triton::MemDescType::get( -@@ -669,12 +685,23 @@ createSchedule(scf::ForOp forOp, int num - } - }); - -+ auto getNestedOperands = [](Operation *op) -> SmallVector { -+ SmallVector operands; -+ op->walk([&](Operation *nestedOp) { -+ for (Value operand : nestedOp->getOperands()) { -+ if (operand.getParentBlock()->getParentOp()->isAncestor(nestedOp)) -+ operands.push_back(operand); -+ } -+ }); -+ return operands; -+ }; -+ - // Find dependencies with distance of 1. - SmallVector> distanceOneUsers(numStages); - for (int stage = 0; stage < numStages - 1; stage++) { - auto &group = insertAndDeps[stage]; - for (Operation *op : group) { -- for (Value operand : op->getOperands()) { -+ for (Value operand : getNestedOperands(op)) { - if (auto arg = operand.dyn_cast()) { - if (arg.getArgNumber() > 0 && arg.getOwner() == op->getBlock()) { - auto yieldOp = op->getBlock()->getTerminator(); -@@ -905,7 +932,7 @@ static int minNumInterleavedCommitOps(Op - // Look for consecutive wait ops and combine them into a single wait op. - static void - combineRedundantWaitOps(llvm::SmallSetVector &waitOps) { -- llvm::SmallSetVector toDelete; -+ llvm::MapVector toDelete; - for (auto waitOp : waitOps) { - if (toDelete.count(waitOp)) - continue; -@@ -927,10 +954,13 @@ combineRedundantWaitOps(llvm::SmallSetVe - OpBuilder builder(waitGroup.back()); - auto newWaitOp = builder.create(waitOp.getLoc(), - depTokens, minWaitNumber); -- toDelete.insert(waitGroup.begin(), waitGroup.end()); -+ for (auto waitOp : waitGroup) { -+ toDelete[waitOp] = newWaitOp; -+ } - } - for (auto waitOp : toDelete) { -- waitOp->erase(); -+ waitOp.first->replaceAllUsesWith(waitOp.second); -+ waitOp.first->erase(); - } - } - -@@ -1010,7 +1040,7 @@ static void threadValuesThroughWait(ttng - - for (ttng::DotAsyncOp dot : asyncDots) { - for (Value operand : dot.getOperands()) { -- if (operand.getType().isa()) { -+ if (isa(operand.getType())) { - newOperands.insert(operand); - } - } -@@ -1020,15 +1050,21 @@ static void threadValuesThroughWait(ttng - // values in the operation. - auto newWait = builder.create( - wait.getLoc(), llvm::to_vector(newOperands), wait.getPendings()); -+ -+ auto dominatedByNewWait = [&](OpOperand &operand) { -+ auto opInThisBlock = -+ newWait->getBlock()->findAncestorOpInBlock(*operand.getOwner()); -+ return opInThisBlock && newWait->isBeforeInBlock(opInThisBlock); -+ }; - for (int i = 0; i < origNumOperands; i++) { - Value operand = wait.getResult(i); -- if (!operand.getType().isa()) -+ if (!isa(operand.getType())) - operand.replaceAllUsesWith(newWait.getResult(i)); - } - for (int i = origNumOperands; i < newOperands.size(); i++) { - Value operand = newWait.getOperand(i); -- if (!operand.getType().isa()) -- operand.replaceAllUsesExcept(newWait.getResult(i), newWait); -+ if (!isa(operand.getType())) -+ operand.replaceUsesWithIf(newWait.getResult(i), dominatedByNewWait); - } - wait->erase(); - } -@@ -1047,8 +1083,8 @@ static void threadValuesThroughWait(ttng - // 1. All operands that touch shared memory are multi-buffered, i.e. can't read - // an incomplete value while it's being written asynchronously by a load. - // --// 2. During iteration i, nothing other than the loop's `yield` reads the --// result of the dot. -+// 2. If the dot is used by any op in the loop, it must be used under an `if`, -+// and will be synced with a `wait 0` at the beginning of the `if` block. - // - // 3. During iteration i, between the start of the loop up until the first - // `ttng.dot_wait {pendings=0}` op, the result of the dot from iteration i-1 -@@ -1079,7 +1115,7 @@ static std::optional dotCanBeProper - // Rule 1: All shmem operands are multi-buffered. - auto checkOperand = [&](Value operand) { - if (!isa( -- operand.getType().cast().getEncoding())) { -+ cast(operand.getType()).getEncoding())) { - return true; - } - -@@ -1103,17 +1139,41 @@ static std::optional dotCanBeProper - return std::nullopt; - } - -- // Rule 2: The dot should only be used by the for loop's `yield`. -- if (!dotOp->hasOneUse() || -- *dotOp->getUsers().begin() != forOp.getBody()->getTerminator()) { -- LDBG("Can't make dot async because it is not used only by the loop's " -- "`yield`."); -- return std::nullopt; -+ // Rule 2: The dot cannot be unconditionally used by any op in the loop. -+ // Uses under `if` are allowed, as can be explicitly synced with a `wait 0`. -+ int iterArgIdx = -1; -+ Value iterArg = nullptr; -+ SmallVector> queue; -+ for (auto &use : dotOp->getUses()) { -+ queue.push_back({use.getOwner(), use.getOperandNumber()}); - } -- -- // The result of the dot becomes this loop carry value. -- auto iterArgIdx = dotOp->getUses().begin()->getOperandNumber(); -- auto iterArg = forOp.getRegionIterArg(iterArgIdx); -+ while (!queue.empty()) { -+ auto [user, argIdx] = queue.pop_back_val(); -+ if (user->getParentOp() == forOp) { -+ if (isa(user)) { -+ if (iterArg) { -+ // The dot is used by the loop's yield, but we can't have any other -+ // uses. -+ return std::nullopt; -+ } -+ iterArgIdx = argIdx; -+ iterArg = forOp.getRegionIterArg(argIdx); -+ continue; -+ } -+ return std::nullopt; -+ } -+ if (auto ifOp = dyn_cast(user->getParentOp())) { -+ if (isa(user)) { -+ // The result is returned by the if, follow it further. -+ auto uses = ifOp.getResult(argIdx).getUses(); -+ for (auto &use : uses) { -+ queue.push_back({use.getOwner(), use.getOperandNumber()}); -+ } -+ } -+ } else { -+ return std::nullopt; -+ } -+ } - - // Rule 3a: Are the only users of the dot's result from iteration i-1 other - // MMAv3 dots? If so, we're done, this dot can be properly async. -@@ -1181,6 +1241,32 @@ static void insertAsyncDotWaitInLoop( - return; - } - -+ // Insert waits before the users of the properly async dots other than loop -+ // yield. -+ for (auto [asyncDot, iterArgIdx] : properlyAsyncDots) { -+ SmallVector uses; -+ for (auto &use : asyncDot->getUses()) { -+ if (auto yieldOp = dyn_cast(use.getOwner())) { -+ continue; -+ } -+ uses.push_back(&use); -+ } -+ -+ DenseMap> blockToUsers; -+ for (auto use : uses) { -+ auto block = use->getOwner()->getBlock(); -+ blockToUsers[block].push_back(use->get()); -+ } -+ -+ for (auto [block, users] : blockToUsers) { -+ OpBuilder builder(block, block->begin()); -+ auto newWait = builder.create(asyncDot->getLoc(), -+ ArrayRef{}, 0); -+ -+ threadValuesThroughWait(newWait, users); -+ } -+ } -+ - // Add the wait right after the last properly-async dot. This only needs to - // wait for all properly-async dots from the i-1'th iteration to complete, IOW - // we wait until there are most `asyncDots.size()` dots in flight. -diff --git a/test/TritonGPU/loop-pipeline.mlir b/test/TritonGPU/loop-pipeline.mlir ---- a/test/TritonGPU/loop-pipeline.mlir -+++ b/test/TritonGPU/loop-pipeline.mlir -@@ -349,16 +349,21 @@ tt.func @indirect_bmm_scalar_dist_one(%7 - // CHECK: triton_gpu.async_copy_global_to_local - // CHECK: triton_gpu.async_copy_global_to_local - // CHECK: triton_gpu.async_commit_group -+// CHECK: triton_gpu.async_wait {{.*}} {num = 1 : i32} -+// CHECK: scf.for -+// CHECK: tt.dot - // CHECK: %[[NEXT_BUFFER_1:.*]] = tt.addptr %{{.*}}, {{.*}} - // CHECK: triton_gpu.async_copy_global_to_local %[[NEXT_BUFFER_1]] --// CHECK: %[[IND_BUFFER_0:.*]] = triton_gpu.memdesc_subview --// CHECK: %[[IND_BUFFER_1:.*]] = triton_gpu.local_load %[[IND_BUFFER_0]] -+// CHECK-DAG: %[[IND_BUFFER_WAIT_TOKEN:.*]] = triton_gpu.async_wait {{.*}} {num = 1 : i32} -+// CHECK-DAG: %[[IND_BUFFER_0:.*]] = triton_gpu.memdesc_subview -+// CHECK: %[[IND_BUFFER_1:.*]] = triton_gpu.local_load %[[IND_BUFFER_0]] token %[[IND_BUFFER_WAIT_TOKEN]] - // CHECK: %[[IND_BUFFER_2:.*]] = tt.expand_dims %[[IND_BUFFER_1]] {axis = 1 : i32} - // CHECK: %[[IND_BUFFER_3:.*]] = tt.broadcast %[[IND_BUFFER_2]] - // CHECK: %[[IND_BUFFER_4:.*]] = arith.muli {{.*}}, %[[IND_BUFFER_3]] - // CHECK: %[[NEXT_BUFFER_0:.*]] = tt.addptr {{.*}}, %[[IND_BUFFER_4]] - // CHECK: triton_gpu.async_copy_global_to_local %[[NEXT_BUFFER_0]] - // CHECK: triton_gpu.async_wait {{.*}} {num = 1 : i32} -+// CHECK: scf.yield - tt.func @indirect_bmm_vector(%77: tensor<16x16xi64, #BL> {tt.divisibility=16: i32, tt.constancy=16: i32}, - %76: index, - %49: tensor<16x16x!tt.ptr, #AL> {tt.divisibility=16: i32, tt.contiguity=2 : i32}, -diff --git a/test/TritonGPU/reorder-instructions.mlir b/test/TritonGPU/reorder-instructions.mlir ---- a/test/TritonGPU/reorder-instructions.mlir -+++ b/test/TritonGPU/reorder-instructions.mlir -@@ -28,7 +28,7 @@ module attributes {"triton_gpu.num-warps - // CHECK: triton_gpu.async_wait {num = 0 : i32} - // CHECK: triton_gpu.local_dealloc %0 : !tt.memdesc<4x128x64xf16, #shared> - // CHECK: triton_gpu.local_dealloc %1 : !tt.memdesc<4x128x64xf16, #shared> --// CHECK: %2 = triton_gpu.convert_layout %arg0 : tensor<32x32xf32, #blocked> -> tensor<32x32xf32, #blocked1> -+// CHECK: %3 = triton_gpu.convert_layout %arg0 : tensor<32x32xf32, #blocked> -> tensor<32x32xf32, #blocked1> - #blocked = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [32, 1], warpsPerCTA = [1, 4], order = [0, 1]}> - #blocked1 = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [32, 1], warpsPerCTA = [1, 4], order = [1, 0]}> - #shared = #triton_gpu.shared<{vec = 8, perPhase = 1, maxPhase = 4, order = [0, 1]}> -diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp ---- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp -+++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp -@@ -333,17 +333,6 @@ static Value faddAccumulate(ConversionPa - return newStruct; - } - --static bool isZero(Value v) { -- auto constantOp = v.getDefiningOp(); -- if (!constantOp) -- return false; -- if (auto denseAttr = dyn_cast(constantOp.getValueAttr())) -- return denseAttr.isSplat() && denseAttr.getSplatValue().isZero(); -- if (auto denseAttr = -- dyn_cast(constantOp.getValueAttr())) -- return denseAttr.isSplat() && denseAttr.getSplatValue().isZero(); -- return false; --} - - static SmallVector emitWait(ConversionPatternRewriter &rewriter, - Location loc, SmallVector acc, -@@ -402,7 +391,7 @@ LogicalResult convertDot(const LLVMTypeC - int M = 4 * instrShape[0]; - int N = instrShape[1]; - int K = instrShape[2]; -- bool zeroAcc = isZero(c); -+ bool zeroAcc = isZeroConst(c); - auto shapePerCTATile = getShapePerCTATile(mmaEncoding); - int numRepM = ceil(dShapePerCTA[0], shapePerCTATile[0]); - int numRepN = ceil(dShapePerCTA[1], shapePerCTATile[1]); -diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/LoadStoreOpToLLVM.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/LoadStoreOpToLLVM.cpp ---- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/LoadStoreOpToLLVM.cpp -+++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/LoadStoreOpToLLVM.cpp -@@ -924,8 +924,11 @@ struct AsyncWaitOpConversion - auto voidTy = void_ty(ctx); - ptxBuilder.launch(rewriter, loc, voidTy); - -- // Safe to remove the op since it doesn't have any return value. -- rewriter.eraseOp(op); -+ // Drop the result token. -+ Value zero = rewriter.create( -+ op.getLoc(), IntegerType::get(op.getContext(), 32), -+ rewriter.getI32IntegerAttr(0)); -+ rewriter.replaceOp(op, zero); - return success(); - } - }; diff --git a/third_party/xla/third_party/triton/temporary/series.bzl b/third_party/xla/third_party/triton/temporary/series.bzl index 70313c9b436d2a..214666767949de 100644 --- a/third_party/xla/third_party/triton/temporary/series.bzl +++ b/third_party/xla/third_party/triton/temporary/series.bzl @@ -6,7 +6,5 @@ internal patch during the next triton integration process. """ temporary_patch_list = [ - "//third_party/triton/temporary:pipelining.patch", - "//third_party/triton/temporary:support_ceil_op.patch", "//third_party/triton/temporary:mma_limit_pred.patch", ] diff --git a/third_party/xla/third_party/triton/temporary/support_ceil_op.patch b/third_party/xla/third_party/triton/temporary/support_ceil_op.patch deleted file mode 100644 index 71b323d9fccdca..00000000000000 --- a/third_party/xla/third_party/triton/temporary/support_ceil_op.patch +++ /dev/null @@ -1,138 +0,0 @@ -Cherry-picking https://github.com/openai/triton/commit/62706e8c518c8c56e56460a43732d8e375217860 -until the next integration lands it. Can be removed as it is already merged. - -diff --git a/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp b/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp ---- a/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp -+++ b/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp -@@ -805,6 +805,7 @@ void mlir::triton::populateElementwiseOp - POPULATE_UNARY_OP(arith::FPToUIOp, LLVM::FPToUIOp) - POPULATE_UNARY_OP(arith::UIToFPOp, LLVM::UIToFPOp) - POPULATE_UNARY_OP(math::FloorOp, math::FloorOp) -+ POPULATE_UNARY_OP(math::CeilOp, math::CeilOp) - POPULATE_UNARY_OP(math::LogOp, math::LogOp) - POPULATE_UNARY_OP(math::Log2Op, math::Log2Op) - POPULATE_UNARY_OP(math::CosOp, math::CosOp) -diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp ---- a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp -+++ b/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp -@@ -125,12 +125,13 @@ void populateMathPatternsAndLegality(Tri - MLIRContext *context = patterns.getContext(); - // Rewrite rule - patterns.add, GenericOpPattern, -- GenericOpPattern, GenericOpPattern, -- GenericOpPattern, GenericOpPattern, -- GenericOpPattern, GenericOpPattern, -- GenericOpPattern, GenericOpPattern, -- GenericOpPattern, GenericOpPattern, -- GenericOpPattern>(typeConverter, context); -+ GenericOpPattern, GenericOpPattern, -+ GenericOpPattern, GenericOpPattern, -+ GenericOpPattern, GenericOpPattern, -+ GenericOpPattern, GenericOpPattern, -+ GenericOpPattern, GenericOpPattern, -+ GenericOpPattern, GenericOpPattern>( -+ typeConverter, context); - } - - // -diff --git a/lib/Dialect/TritonNvidiaGPU/Transforms/PlanCTA.cpp b/lib/Dialect/TritonNvidiaGPU/Transforms/PlanCTA.cpp ---- a/lib/Dialect/TritonNvidiaGPU/Transforms/PlanCTA.cpp -+++ b/lib/Dialect/TritonNvidiaGPU/Transforms/PlanCTA.cpp -@@ -651,10 +651,9 @@ bool CTAPlanner::isElementwiseOp(Operati - math::CeilOp, math::CopySignOp, math::CosOp, math::SinOp, - math::CountLeadingZerosOp, math::CountTrailingZerosOp, - math::CtPopOp, math::ErfOp, math::ExpOp, math::Exp2Op, -- math::FloorOp, math::ExpM1Op, math::FloorOp, math::FmaOp, -- math::LogOp, math::Log10Op, math::Log1pOp, math::Log2Op, -- math::PowFOp, math::RsqrtOp, math::SqrtOp, math::RsqrtOp, -- math::TanhOp>(op)) -+ math::FloorOp, math::ExpM1Op, math::FmaOp, math::LogOp, -+ math::Log10Op, math::Log1pOp, math::Log2Op, math::PowFOp, -+ math::RsqrtOp, math::SqrtOp, math::RsqrtOp, math::TanhOp>(op)) - return true; - if (llvm::isa Value { - return self.create(val); - }) -+ .def("create_ceil", -+ [](TritonOpBuilder &self, Value &val) -> Value { -+ return self.create(val); -+ }) - .def("create_exp", - [](TritonOpBuilder &self, Value &val) -> Value { - return self.create(val); -diff --git a/python/test/unit/language/test_core.py b/python/test/unit/language/test_core.py ---- a/python/test/unit/language/test_core.py -+++ b/python/test/unit/language/test_core.py -@@ -915,10 +915,11 @@ def test_unary_op(dtype_x, expr, num_cta - - - @pytest.mark.interpreter --@pytest.mark.parametrize("dtype_x, expr, x", [(dtype_x, expr, x) -- for dtype_x in ["float32", "float64"] -- for expr in ['exp', 'log', 'cos', 'sin', 'exp2', 'log2', 'sqrt', 'floor'] -- for x in ['x', '3.0']]) -+@pytest.mark.parametrize("dtype_x, expr, x", -+ [(dtype_x, expr, x) -+ for dtype_x in ["float32", "float64"] -+ for expr in ['exp', 'log', 'cos', 'sin', 'exp2', 'log2', 'sqrt', 'floor', 'ceil'] -+ for x in ['x', '3.0']]) - def test_math_op(dtype_x, expr, x, device): - _test_unary(dtype_x, f'tl.{expr}({x})', f'np.{expr}({x}) ', device=device) - -diff --git a/python/triton/language/__init__.py b/python/triton/language/__init__.py ---- a/python/triton/language/__init__.py -+++ b/python/triton/language/__init__.py -@@ -102,7 +102,8 @@ from .core import ( - void, - where, - ) --from .math import (umulhi, exp, exp2, fma, log, log2, cos, rsqrt, sin, sqrt, sqrt_rn, abs, fdiv, div_rn, erf, floor) -+from .math import (umulhi, exp, exp2, fma, log, log2, cos, rsqrt, sin, sqrt, sqrt_rn, abs, fdiv, div_rn, erf, floor, -+ ceil) - from .random import ( - pair_uniform_to_normal, - philox, -@@ -142,6 +143,7 @@ from .random import ( - "builtin", - "cat", - "cdiv", -+ "ceil", - "clamp", - "const", - "const_pointer_type", -diff --git a/python/triton/language/math.py b/python/triton/language/math.py ---- a/python/triton/language/math.py -+++ b/python/triton/language/math.py -@@ -230,6 +230,15 @@ def floor(x, _builder=None): - - - @core.builtin -+@_check_dtype(dtypes=["fp32", "fp64"]) -+@_add_math_1arg_docstr("ceil") -+@core._tensor_member_fn -+def ceil(x, _builder=None): -+ x = core._to_tensor(x, _builder) -+ return core.tensor(_builder.create_ceil(x.handle), x.type) -+ -+ -+@core.builtin - @_add_math_3arg_docstr("fused multiply-add") - def fma(x, y, z, _builder=None): - x = core._to_tensor(x, _builder) -diff --git a/python/triton/runtime/interpreter.py b/python/triton/runtime/interpreter.py ---- a/python/triton/runtime/interpreter.py -+++ b/python/triton/runtime/interpreter.py -@@ -391,6 +391,7 @@ class InterpreterBuilder: - create_fabs = lambda self, arg: self.unary_op(arg, np.abs) - create_iabs = lambda self, arg: self.unary_op(arg, np.abs) - create_floor = lambda self, arg: self.unary_op(arg, np.floor) -+ create_ceil = lambda self, arg: self.unary_op(arg, np.ceil) - create_log = lambda self, arg: self.unary_op(arg, np.log) - create_log2 = lambda self, arg: self.unary_op(arg, np.log2) - create_precise_sqrt = lambda self, arg: self.unary_op(arg, np.sqrt) diff --git a/third_party/xla/third_party/triton/workspace.bzl b/third_party/xla/third_party/triton/workspace.bzl index 45daf7974a022e..933b815253ffcd 100644 --- a/third_party/xla/third_party/triton/workspace.bzl +++ b/third_party/xla/third_party/triton/workspace.bzl @@ -8,8 +8,8 @@ load("//third_party/triton/xla_extensions:series.bzl", "extensions_files_patch_l def repo(): """Imports Triton.""" - TRITON_COMMIT = "cl623533461" - TRITON_SHA256 = "7aa74e82e4417a91fc7a7a84b4f6ad2b7e4e58512758d6c78ca3cd1c8771326b" + TRITON_COMMIT = "cl632952902" + TRITON_SHA256 = "f83c4f197cc2ae3b034070ec8189007451093edc445d1e383eb17a1e3808de9a" tf_http_archive( name = "triton", sha256 = TRITON_SHA256, diff --git a/third_party/xla/third_party/triton/xla_extensions/env_vars.patch b/third_party/xla/third_party/triton/xla_extensions/env_vars.patch deleted file mode 100644 index 955eb6db8da68e..00000000000000 --- a/third_party/xla/third_party/triton/xla_extensions/env_vars.patch +++ /dev/null @@ -1,14 +0,0 @@ -Long standing patch due to licensing issues. -diff --git a/include/triton/Tools/Sys/GetEnv.hpp b/include/triton/Tools/Sys/GetEnv.hpp -index 31bc03fe1..a19a432df 100644 ---- a/include/triton/Tools/Sys/GetEnv.hpp -+++ b/include/triton/Tools/Sys/GetEnv.hpp -@@ -34,7 +34,7 @@ inline const std::set ENV_VARS = { - "AMDGCN_ENABLE_DUMP", - "DISABLE_FAST_REDUCTION", - "DISABLE_LLVM_OPT", -- "DISABLE_MMA_V3", -+ "ENABLE_MMA_V3", - "DISABLE_PTXAS_OPT", - "LLVM_IR_ENABLE_DUMP", - "MLIR_ENABLE_DUMP", diff --git a/third_party/xla/third_party/triton/xla_extensions/series.bzl b/third_party/xla/third_party/triton/xla_extensions/series.bzl index b858da203fb094..af524fb253cbef 100644 --- a/third_party/xla/third_party/triton/xla_extensions/series.bzl +++ b/third_party/xla/third_party/triton/xla_extensions/series.bzl @@ -4,7 +4,6 @@ applied in the previous copybara workflow. """ extensions_files_patch_list = [ - "//third_party/triton/xla_extensions:env_vars.patch", # File not exported to google "//third_party/triton/xla_extensions:sparse_dot_nvgpu.patch", # Sparsity internal patch "//third_party/triton/xla_extensions:sparse_dot_base.patch", # Sparsity internal patch "//third_party/triton/xla_extensions:sparse_dot_passes.patch", # Sparsity internal patch diff --git a/third_party/xla/third_party/triton/xla_extensions/sparse_dot_base.patch b/third_party/xla/third_party/triton/xla_extensions/sparse_dot_base.patch index dcacd99740b18f..08b7dd6f7ada87 100644 --- a/third_party/xla/third_party/triton/xla_extensions/sparse_dot_base.patch +++ b/third_party/xla/third_party/triton/xla_extensions/sparse_dot_base.patch @@ -1,8 +1,9 @@ diff --git a/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td b/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td +index 56f0b6b49..aa91ea9b8 100644 --- a/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td +++ b/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td -@@ -1158,4 +1158,12 @@ section 9.7.13.4.1 for more details. - let extraClassDeclaration = extraDistributedDeclaration; +@@ -1262,4 +1262,16 @@ section 9.7.13.4.1 for more details. + }]; } +def SparseDotMetaEncodingAttr : DistributedEncoding<"SparseDotMetaEncoding", "sparse_dot_meta_encoding"> { @@ -10,14 +11,19 @@ diff --git a/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td b/include/ + + let parameters = (ins "Attribute":$parent); + let assemblyFormat = "`<``{` struct(params) `}``>`"; -+ let extraClassDeclaration = extraDistributedDeclaration; ++ let extraClassDeclaration = extraDistributedDeclaration # [{ ++ SmallVector getContigPerThread() { ++ return getSizePerThread(); ++ }; ++ }]; +} + #endif diff --git a/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td b/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td +index 4966a5f73..d2bb33cfa 100644 --- a/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td +++ b/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td -@@ -7,6 +7,7 @@ include "triton/Dialect/TritonGPU/IR/Tri +@@ -7,6 +7,7 @@ include "triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td" include "mlir/Dialect/Arith/IR/ArithBase.td" include "triton/Dialect/Triton/IR/TritonTypes.td" include "triton/Dialect/Triton/IR/TritonAttrDefs.td" @@ -25,8 +31,8 @@ diff --git a/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td b/include/trito include "mlir/IR/OpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" // Pure include "mlir/Interfaces/InferTypeOpInterface.td" // SameOperandsAndResultType -@@ -214,4 +215,19 @@ def TTG_LocalLoadOp : TTG_Op<"local_load - let results = (outs TT_Tensor:$result); +@@ -232,4 +233,19 @@ def TTG_LocalStoreOp : TTG_Op<"local_store", [MemoryEffects<[MemWrite shape, return encoding; } @@ -70,7 +77,7 @@ diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dia + +LogicalResult SparseDotOp::verify() { + // Verify operand A. -+ auto aTensorTy = getOperand(0).getType().cast(); ++ auto aTensorTy = cast(getOperand(0).getType()); + auto aElemTy = aTensorTy.getElementType(); + if (!aElemTy.isF16() && !aElemTy.isBF16()) + return emitError("element type of operand A is not supported"); @@ -78,7 +85,7 @@ diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dia + if (aShape.size() != 2) return emitError("shape of operand A is incorrect"); + + // Verify operand B. -+ auto bTensorTy = getOperand(1).getType().cast(); ++ auto bTensorTy = cast(getOperand(1).getType()); + auto bElemTy = bTensorTy.getElementType(); + if (!bElemTy.isF16() && !bElemTy.isBF16()) + return emitError("element type of operand B is not supported"); @@ -86,7 +93,7 @@ diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dia + if (bShape.size() != 2) return emitError("shape of operand B is incorrect"); + + // Verify operand C. -+ auto cTensorTy = getOperand(2).getType().cast(); ++ auto cTensorTy = cast(getOperand(2).getType()); + auto cElemTy = cTensorTy.getElementType(); + if (!cElemTy.isF32()) + return emitError("element type of operand C is not supported"); @@ -101,7 +108,7 @@ diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dia + return emitError("operand element types do not match"); + + // Verify sparse metadata. -+ auto metaTy = getOperand(3).getType().cast(); ++ auto metaTy = cast(getOperand(3).getType()); + auto metaShape = metaTy.getShape(); + if (!metaTy.getElementType().isInteger(16) || metaShape.size() != 2) + return emitError("sparse metadata tensor is invalid"); @@ -125,7 +132,7 @@ diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dia +//--- SparseDotMetaEncodingAttr --- +unsigned SparseDotMetaEncodingAttr::getTotalElemsPerThread( + ArrayRef shape, Type eltTy) const { -+ auto mmaLayout = getParent().cast(); ++ auto mmaLayout = mlir::cast(getParent()); + return product(shape) / + (mmaLayout.getWarpsPerCTA()[0] * kMetadataElementsPerWarp); +} @@ -169,9 +176,10 @@ diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dia } // namespace triton } // namespace mlir diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM.cpp +index f8ece0f1c..435610817 100644 --- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM.cpp +++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM.cpp -@@ -38,6 +38,14 @@ Value convertLayout(int opIdx, Conversio +@@ -43,6 +43,14 @@ Value convertLayout(int opIdx, ConversionPatternRewriter &rewriter, const LLVMTypeConverter *typeConverter, Value thread); } @@ -185,19 +193,19 @@ diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM. + namespace { - struct LocalLoadOpConversion -@@ -59,6 +67,10 @@ public: - .isa()) { + using namespace mlir; +@@ -67,6 +75,10 @@ public: + cast(dstLayout).getParent())) { return lowerSharedToDotOperand(op, adaptor, getTypeConverter(), rewriter); } -+ if (srcLayout.isa() && -+ dstLayout.isa()) { ++ if (isa(srcLayout) && ++ isa(dstLayout)) { + return lowerSharedToSparseMeta(op, adaptor, getTypeConverter(), rewriter); + } return failure(); } -@@ -130,6 +142,29 @@ private: +@@ -138,6 +150,26 @@ private: rewriter.replaceOp(op, res); return success(); } @@ -208,13 +216,10 @@ diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM. + const LLVMTypeConverter *typeConverter, + ConversionPatternRewriter &rewriter) const { + auto loc = op.getLoc(); -+ auto sparseEncoding = op.getResult() -+ .getType() -+ .cast() -+ .getEncoding() -+ .cast(); ++ auto sparseEncoding = cast( ++ cast(op.getResult().getType()).getEncoding()); + auto llvmElemTy = typeConverter->convertType( -+ op.getSrc().getType().cast().getElementType()); ++ cast(op.getSrc().getType()).getElementType()); + auto smemObj = getSharedMemoryObjectFromStruct(loc, adaptor.getSrc(), + llvmElemTy, rewriter); + Value res = SharedToSparseDotOperand::convertLayout( @@ -229,6 +234,7 @@ diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM. struct ConvertLayoutOpOptimizedConversion diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp new file mode 100644 +index 000000000..3011cf73d --- /dev/null +++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp @@ -0,0 +1,69 @@ @@ -255,7 +261,7 @@ new file mode 100644 + Value thread) { + // Calculate tile size as number of mask elements (4xi4). + NvidiaMmaEncodingAttr mmaLayout = -+ sparseEncoding.getParent().cast(); ++ cast(sparseEncoding.getParent()); + SmallVector shapePerCTATile = { + kTileSize * mmaLayout.getWarpsPerCTA()[0], + kTileSize / kMetadataElementsPerPackedValue}; @@ -272,7 +278,7 @@ new file mode 100644 + Value rowId = add(mul(warpGroupId, i32_val(kTileSize)), laneGroupId); + + // Calculate number of tile repetitions. -+ auto shape = tensor.getType().cast().getShape(); ++ auto shape = cast(tensor.getType()).getShape(); + int repM = shape[0] / shapePerCTATile[0]; + int repK = shape[1] / shapePerCTATile[1]; + assert(repM > 0 && repK > 0); @@ -302,9 +308,10 @@ new file mode 100644 +} +} // namespace SharedToSparseDotOperand diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp +index 374b9ec9e..1601806b4 100644 --- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp +++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp -@@ -32,6 +32,12 @@ LogicalResult convertAsyncWGMMA(triton:: +@@ -32,6 +32,12 @@ LogicalResult convertAsyncWGMMA(triton::nvidia_gpu::DotAsyncOp op, const LLVMTypeConverter *typeConverter, ConversionPatternRewriter &rewriter, Value thread); @@ -317,7 +324,7 @@ diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp b/thir namespace { struct DotOpConversion : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; -@@ -180,6 +186,18 @@ struct DotWaitOpConversion +@@ -174,6 +180,18 @@ struct DotWaitOpConversion return success(); } }; @@ -336,7 +343,7 @@ diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp b/thir } // namespace void mlir::triton::NVIDIA::populateDotOpToLLVMPatterns( -@@ -188,4 +206,5 @@ void mlir::triton::NVIDIA::populateDotOp +@@ -182,4 +200,5 @@ void mlir::triton::NVIDIA::populateDotOpToLLVMPatterns( patterns.add(typeConverter, benefit); patterns.add(typeConverter, benefit); patterns.add(typeConverter, benefit); @@ -344,6 +351,7 @@ diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp b/thir } diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/Sparse.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/Sparse.cpp new file mode 100644 +index 000000000..34d9212d2 --- /dev/null +++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/Sparse.cpp @@ -0,0 +1,339 @@ @@ -397,15 +405,15 @@ new file mode 100644 + const LLVMTypeConverter *typeConverter, + ConversionPatternRewriter &rewriter) { + // Get number of repetitions across the dimensions. -+ auto aTensorTy = op.getA().getType().cast(); -+ auto bTensorTy = op.getB().getType().cast(); ++ auto aTensorTy = cast(op.getA().getType()); ++ auto bTensorTy = cast(op.getB().getType()); + -+ auto layoutA = aTensorTy.getEncoding().dyn_cast(); -+ auto layoutB = bTensorTy.getEncoding().dyn_cast(); ++ auto layoutA = dyn_cast(aTensorTy.getEncoding()); ++ auto layoutB = dyn_cast(bTensorTy.getEncoding()); + assert(layoutA != nullptr && layoutB != nullptr); + + int bitwidth = aTensorTy.getElementType().getIntOrFloatBitWidth(); -+ auto mmaEnc = layoutA.getParent().cast(); ++ auto mmaEnc = cast(layoutA.getParent()); + auto repA = mmaEnc.getMMAv2Rep(triton::gpu::getShapePerCTA(aTensorTy), + bitwidth, layoutA.getOpIdx()); + auto repB = mmaEnc.getMMAv2Rep(triton::gpu::getShapePerCTA(bTensorTy), @@ -435,7 +443,7 @@ new file mode 100644 + } + + // Flatten accumulator values. -+ auto dTensorTy = op.getD().getType().cast(); ++ auto dTensorTy = cast(op.getD().getType()); + auto fc = unpackLLElements(loc, adaptor.getC(), rewriter); + + // Create `mma.sp` instruction for 4/8 core matrices. @@ -512,7 +520,7 @@ new file mode 100644 + Location loc, std::vector instrShape, + bool trans, int dimWpt, Value warpId, MemDescType tensorTy, + Value baseDesc, int minor) { -+ auto sharedLayout = tensorTy.getEncoding().cast(); ++ auto sharedLayout = cast(tensorTy.getEncoding()); + int elemBytes = tensorTy.getElementTypeBitWidth() / 8; + int elemsPerSwizzlingRow = + kMmaLineSize / sharedLayout.getPerPhase() / elemBytes; @@ -541,10 +549,10 @@ new file mode 100644 + ConversionPatternRewriter &rewriter, + Value thread) { + // Get number of repetitions across the dimensions. -+ auto aTensorTy = op.getA().getType().cast(); -+ auto bTensorTy = op.getB().getType().cast(); -+ auto dTensorTy = op.getD().getType().cast(); -+ auto mmaEnc = dTensorTy.getEncoding().cast(); ++ auto aTensorTy = cast(op.getA().getType()); ++ auto bTensorTy = cast(op.getB().getType()); ++ auto dTensorTy = cast(op.getD().getType()); ++ auto mmaEnc = cast(dTensorTy.getEncoding()); + + auto shapePerCTA = getShapePerCTA(dTensorTy); + auto shapePerCTATile = getShapePerCTATile(mmaEnc); @@ -573,7 +581,7 @@ new file mode 100644 + auto sharedObj = getSharedMemoryObjectFromStruct( + loc, arg, typeConverter->convertType(tensorTy.getElementType()), + rewriter); -+ auto sharedLayout = tensorTy.getEncoding().cast(); ++ auto sharedLayout = cast(tensorTy.getEncoding()); + auto shape = getShapePerCTA(tensorTy); + auto ord = sharedLayout.getOrder(); + int byteSize = aTensorTy.getElementTypeBitWidth() / 8; @@ -671,9 +679,9 @@ new file mode 100644 + SparseDotOp::Adaptor adaptor, + const LLVMTypeConverter *typeConverter, + ConversionPatternRewriter &rewriter) { -+ auto resultTy = op.getResult().getType().cast(); ++ auto resultTy = cast(op.getResult().getType()); + NvidiaMmaEncodingAttr mmaLayout = -+ resultTy.getEncoding().cast(); ++ cast(resultTy.getEncoding()); + + if (mmaLayout.isAmpere()) { + return convertSparseMMA(op, adaptor, typeConverter, rewriter); @@ -687,9 +695,10 @@ new file mode 100644 + "Unsupported SparseDotOp found when converting TritonGPU to LLVM."); +} diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp +index 738f0fe04..867939f65 100644 --- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp +++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp -@@ -87,8 +87,8 @@ int64_t getSwizzlingFromLayout(const Sha +@@ -88,8 +88,8 @@ int64_t getSwizzlingFromLayout(const SharedEncodingAttr &layout, return swizzlingByteWidth; } diff --git a/third_party/xla/third_party/triton/xla_extensions/sparse_dot_fixes_y24w17.patch b/third_party/xla/third_party/triton/xla_extensions/sparse_dot_fixes_y24w17.patch index 9d1ae2e91cae3f..ce009aa688e9bf 100644 --- a/third_party/xla/third_party/triton/xla_extensions/sparse_dot_fixes_y24w17.patch +++ b/third_party/xla/third_party/triton/xla_extensions/sparse_dot_fixes_y24w17.patch @@ -1,30 +1,8 @@ -diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp ---- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp -+++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp -@@ -22,16 +22,16 @@ Value convertLayout( - // Calculate tile size as number of mask elements (4xi4). - NvidiaMmaEncodingAttr mmaLayout = - sparseEncoding.getParent().cast(); -+ SmallVector warpsPerCTA = mmaLayout.getWarpsPerCTA(); - SmallVector shapePerCTATile = { -- kTileSize * mmaLayout.getWarpsPerCTA()[0], -- kTileSize / kMetadataElementsPerPackedValue}; -+ kTileSize * warpsPerCTA[0], kTileSize / kMetadataElementsPerPackedValue}; - Value strideM = smemObj.strides[0]; - Value strideK = smemObj.strides[1]; - - // Calculate offset in the tile for the current thread. - Value threadsPerWarp = i32_val(kThreadsPerWarp); - Value warpId = udiv(thread, threadsPerWarp); -- Value warpGroupId = urem(warpId, i32_val(shapePerCTATile[0] / kTileSize)); -+ Value warpGroupId = udiv(warpId, i32_val(warpsPerCTA[1])); - Value laneId = urem(thread, threadsPerWarp); - Value laneGroupId = udiv(laneId, i32_val(kThreadsInGroup)); - Value columnId = urem(laneId, i32_val(shapePerCTATile[1])); diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp +index 0516fc56f..1f27f8a43 100644 --- a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp +++ b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp -@@ -139,6 +139,7 @@ class BlockedToMMA : public mlir::Rewrit +@@ -142,6 +142,7 @@ class BlockedToMMA : public mlir::RewritePattern { mlir::TypeID::get()); } @@ -32,7 +10,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect // Finds the first different bitwidth in the chain of shape-preserving // unary ops that x depends on. // There are two primary scenarios: -@@ -172,7 +173,6 @@ class BlockedToMMA : public mlir::Rewrit +@@ -175,7 +176,6 @@ class BlockedToMMA : public mlir::RewritePattern { return origBitWidth; } @@ -40,7 +18,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect BlockedToMMA(mlir::MLIRContext *context, int computeCapability) : mlir::RewritePattern(tt::DotOp::getOperationName(), 2, context), computeCapability(computeCapability) {} -@@ -388,18 +388,22 @@ class SparseBlockedToMMA : public mlir:: +@@ -389,18 +389,22 @@ class SparseBlockedToMMA : public mlir::RewritePattern { newRetType, oldAcc); if (versionMajor == 2) { @@ -49,7 +27,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect + int kWidth = 32 / minBitwidth; + // convert A operand - auto oldAType = a.getType().cast(); + auto oldAType = cast(a.getType()); - auto newAEncoding = ttg::DotOperandEncodingAttr::get( - ctx, 0, mmaEnc, oldAType.getElementType()); + auto newAEncoding = @@ -59,7 +37,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect a = rewriter.create(a.getLoc(), newAType, a); // convert B operand - auto oldBType = b.getType().cast(); + auto oldBType = cast(b.getType()); - auto newBEncoding = ttg::DotOperandEncodingAttr::get( - ctx, 1, mmaEnc, oldBType.getElementType()); + auto newBEncoding = @@ -67,3 +45,27 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect auto newBType = RankedTensorType::get( oldBType.getShape(), oldBType.getElementType(), newBEncoding); b = rewriter.create(b.getLoc(), newBType, b); +diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp +index 3011cf73d..ea587dced 100644 +--- a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp ++++ b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM/SharedToSparseDotOperand.cpp +@@ -22,16 +22,16 @@ Value convertLayout( + // Calculate tile size as number of mask elements (4xi4). + NvidiaMmaEncodingAttr mmaLayout = + cast(sparseEncoding.getParent()); ++ SmallVector warpsPerCTA = mmaLayout.getWarpsPerCTA(); + SmallVector shapePerCTATile = { +- kTileSize * mmaLayout.getWarpsPerCTA()[0], +- kTileSize / kMetadataElementsPerPackedValue}; ++ kTileSize * warpsPerCTA[0], kTileSize / kMetadataElementsPerPackedValue}; + Value strideM = smemObj.strides[0]; + Value strideK = smemObj.strides[1]; + + // Calculate offset in the tile for the current thread. + Value threadsPerWarp = i32_val(kThreadsPerWarp); + Value warpId = udiv(thread, threadsPerWarp); +- Value warpGroupId = urem(warpId, i32_val(shapePerCTATile[0] / kTileSize)); ++ Value warpGroupId = udiv(warpId, i32_val(warpsPerCTA[1])); + Value laneId = urem(thread, threadsPerWarp); + Value laneGroupId = udiv(laneId, i32_val(kThreadsInGroup)); + Value columnId = urem(laneId, i32_val(shapePerCTATile[1])); diff --git a/third_party/xla/third_party/triton/xla_extensions/sparse_dot_nvgpu.patch b/third_party/xla/third_party/triton/xla_extensions/sparse_dot_nvgpu.patch index ced13cff33fd16..791618363b2f34 100644 --- a/third_party/xla/third_party/triton/xla_extensions/sparse_dot_nvgpu.patch +++ b/third_party/xla/third_party/triton/xla_extensions/sparse_dot_nvgpu.patch @@ -1,7 +1,8 @@ -diff --git a/include/triton/Dialect/NVGPU/IR/NVGPUOps.td b/include/triton/Dialect/NVGPU/IR/NVGPUOps.td ---- a/include/triton/Dialect/NVGPU/IR/NVGPUOps.td -+++ b/include/triton/Dialect/NVGPU/IR/NVGPUOps.td -@@ -87,6 +87,15 @@ def NVGPU_WGMMAOp : NVGPU_Op<"wgmma", [] +diff --git a/third_party/nvidia/include/Dialect/NVGPU/IR/NVGPUOps.td b/third_party/nvidia/include/Dialect/NVGPU/IR/NVGPUOps.td +index ca9d18873..d39bc6ec4 100644 +--- a/third_party/nvidia/include/Dialect/NVGPU/IR/NVGPUOps.td ++++ b/third_party/nvidia/include/Dialect/NVGPU/IR/NVGPUOps.td +@@ -87,6 +87,15 @@ def NVGPU_WGMMAOp : NVGPU_Op<"wgmma", []> { let assemblyFormat = "$opA `,` $opB (`,` $opC^)? attr-dict `:` functional-type(operands, $res)"; } @@ -18,9 +19,10 @@ diff --git a/include/triton/Dialect/NVGPU/IR/NVGPUOps.td b/include/triton/Dialec let arguments = (ins LLVM_AnyPointer:$addr, I32:$ctaId, I32Attr:$bitwidth, I32Attr:$vec); let builders = [ diff --git a/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp b/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp +index e19216520..aacbfb569 100644 --- a/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp +++ b/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp -@@ -688,6 +688,84 @@ public: +@@ -668,6 +668,84 @@ public: } }; @@ -31,7 +33,7 @@ diff --git a/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp b/third_part + using Base::Base; + + std::vector getOutputConstraints(ttn::SparseWGMMAOp op) const { -+ auto outputStructType = op.getType().cast(); ++ auto outputStructType = cast(op.getType()); + uint32_t numOutputRegs = outputStructType.getBody().size(); + std::string output = + outputStructType.getBody().front().isF32() ? "=f" : "=r"; @@ -71,7 +73,7 @@ diff --git a/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp b/third_part + + // Output and operand C + uint32_t numCRegs = -+ op.getType().cast().getBody().size(); ++ cast(op.getType()).getBody().size(); + args += "{"; + for (uint32_t i = 0; i < numCRegs; ++i) { + args += "$" + std::to_string(asmOpIdx++) + (i == numCRegs - 1 ? "" : ","); @@ -105,13 +107,17 @@ diff --git a/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp b/third_part class ConvertNVGPUToLLVM : public ConvertNVGPUToLLVMBase { public: -@@ -711,7 +789,8 @@ public: +@@ -688,10 +766,9 @@ public: + patterns.add>( + context, Cluster_Cta_Id_Op, Constraints({"=r"}), Constraints()); - patterns.add(context); -+ WGMMAWaitGroupOpPattern, StoreDSmemOpPattern, -+ SparseWGMMAOpPattern>(context); +- patterns +- .add( +- context); ++ patterns.add(context); if (applyPatternsAndFoldGreedily(mod, std::move(patterns)).failed()) signalPassFailure(); diff --git a/third_party/xla/third_party/triton/xla_extensions/sparse_dot_passes.patch b/third_party/xla/third_party/triton/xla_extensions/sparse_dot_passes.patch index 74662cf3c90dc9..9136cb84b24254 100644 --- a/third_party/xla/third_party/triton/xla_extensions/sparse_dot_passes.patch +++ b/third_party/xla/third_party/triton/xla_extensions/sparse_dot_passes.patch @@ -1,7 +1,8 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp +index 4aa2712ec..16a6253d7 100644 --- a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp +++ b/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp -@@ -277,6 +277,89 @@ struct TritonDotPattern : public OpConve +@@ -279,6 +279,89 @@ struct TritonDotPattern : public OpConversionPattern { } }; @@ -12,7 +13,7 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Co + LogicalResult matchAndRewrite( + triton::gpu::SparseDotOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { -+ RankedTensorType origType = op.getType().cast(); ++ RankedTensorType origType = cast(op.getType()); + auto origShape = origType.getShape(); + auto typeConverter = getTypeConverter(); + int numWarps = typeConverter->getNumWarps(); @@ -40,8 +41,8 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Co + RankedTensorType::get(origShape, origType.getElementType(), dEncoding); + + // a & b must be of smem layout -+ auto aType = adaptor.getA().getType().cast(); -+ auto bType = adaptor.getB().getType().cast(); ++ auto aType = cast(adaptor.getA().getType()); ++ auto bType = cast(adaptor.getB().getType()); + Type aEltType = aType.getElementType(); + Type bEltType = bType.getElementType(); + Attribute aEncoding = aType.getEncoding(); @@ -51,14 +52,14 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Co + Value a = adaptor.getA(); + Value b = adaptor.getB(); + Value c = adaptor.getC(); -+ if (!aEncoding.isa()) { ++ if (!isa(aEncoding)) { + Attribute encoding = triton::gpu::DotOperandEncodingAttr::get( + getContext(), 0, dEncoding, aEltType); + auto dstType = + RankedTensorType::get(aType.getShape(), aEltType, encoding); + a = rewriter.create(a.getLoc(), dstType, a); + } -+ if (!bEncoding.isa()) { ++ if (!isa(bEncoding)) { + Attribute encoding = triton::gpu::DotOperandEncodingAttr::get( + getContext(), 1, dEncoding, bEltType); + auto dstType = @@ -68,11 +69,11 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Co + c = rewriter.create(c.getLoc(), retType, c); + + // aMeta must be of smem layout -+ auto aMetaType = adaptor.getAMeta().getType().cast(); ++ auto aMetaType = cast(adaptor.getAMeta().getType()); + Attribute aMetaEncoding = aMetaType.getEncoding(); + if (!aMetaEncoding) return failure(); + Value aMeta = adaptor.getAMeta(); -+ if (!aMetaEncoding.isa()) { ++ if (!isa(aMetaEncoding)) { + Attribute encoding = + triton::gpu::SparseDotMetaEncodingAttr::get(getContext(), dEncoding); + auto dstType = RankedTensorType::get( @@ -91,17 +92,17 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Co struct TritonCatPattern : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; -@@ -550,6 +633,7 @@ void populateTritonPatterns(TritonGPUTyp - GenericOpPattern, GenericOpPattern, +@@ -553,6 +636,7 @@ void populateTritonPatterns(TritonGPUTypeConverter &typeConverter, + GenericOpPattern, GenericOpPattern, TritonFuncOpPattern>(typeConverter, context); + patterns.insert(typeConverter, context); } // -@@ -788,6 +872,12 @@ public: - IntegerAttr::get( - i32_ty, llvm::APInt(32, computeCapability.getValue()))); +@@ -794,6 +878,12 @@ public: + mod->setAttr(AttrTargetName, + StringAttr::get(context, this->target.getValue())); + // Only transform sparse dot op with undefined layout. + target.addDynamicallyLegalOp( @@ -113,9 +114,10 @@ diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Co return signalPassFailure(); diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp +index 098ee85e4..0516fc56f 100644 --- a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp +++ b/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp -@@ -42,8 +42,9 @@ static int getMMAVersionSafe(int compute +@@ -44,8 +44,9 @@ static int getMMAVersionSafe(int computeCapability, tt::DotOp op) { return 0; } @@ -126,7 +128,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect auto rank = shape.size(); // Early exit for batched matmul if (rank == 3) -@@ -56,14 +57,14 @@ warpsPerTileV2(tt::DotOp dotOp, const Ar +@@ -58,8 +59,8 @@ warpsPerTileV2(tt::DotOp dotOp, const ArrayRef shape, int numWarps) { auto slices = multiRootGetSlice(dotOp, {filter}, {filter}); bool hasChainedDot = false; for (Operation *op : slices) { @@ -137,14 +139,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect auto resTy = chainedDot.getResult().getType(); if (resTy.getRank() != rank) { continue; - } - if (auto mmaEncoding = -- resTy.getEncoding().dyn_cast()) { -+ resTy.getEncoding().template dyn_cast()) { - return ttg::getWarpsPerCTA(mmaEncoding); - } - hasChainedDot = true; -@@ -101,12 +102,13 @@ warpsPerTileV2(tt::DotOp dotOp, const Ar +@@ -103,12 +104,13 @@ warpsPerTileV2(tt::DotOp dotOp, const ArrayRef shape, int numWarps) { return ret; } @@ -162,7 +157,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect slices.end()) return {(unsigned)numWarps, 1}; -@@ -175,9 +177,10 @@ public: +@@ -178,9 +180,10 @@ public: : mlir::RewritePattern(tt::DotOp::getOperationName(), 2, context), computeCapability(computeCapability) {} @@ -176,7 +171,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect switch (version) { case 2: return warpsPerTileV2(dotOp, shape, numWarps); -@@ -337,6 +340,98 @@ public: +@@ -335,6 +338,98 @@ public: return success(); } }; @@ -201,7 +196,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect + // Check data-types and SM compatibility + RankedTensorType oldRetType = dotOp.getType(); + if (!oldRetType.getEncoding() || -+ oldRetType.getEncoding().isa()) ++ isa(oldRetType.getEncoding())) + return failure(); + + assert(computeCapability >= 80 && @@ -216,7 +211,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect + + auto instrShape = + mmaVersionToInstrShape(versionMajor, retShapePerCTA, -+ a.getType().cast(), numWarps); ++ cast(a.getType()), numWarps); + auto warpsPerTile = BlockedToMMA::getWarpsPerTile( + dotOp, retShapePerCTA, versionMajor, numWarps, instrShape); + ttg::NvidiaMmaEncodingAttr mmaEnc = @@ -232,7 +227,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect + + if (versionMajor == 2) { + // convert A operand -+ auto oldAType = a.getType().cast(); ++ auto oldAType = cast(a.getType()); + auto newAEncoding = ttg::DotOperandEncodingAttr::get( + ctx, 0, mmaEnc, oldAType.getElementType()); + auto newAType = RankedTensorType::get( @@ -240,7 +235,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect + a = rewriter.create(a.getLoc(), newAType, a); + + // convert B operand -+ auto oldBType = b.getType().cast(); ++ auto oldBType = cast(b.getType()); + auto newBEncoding = ttg::DotOperandEncodingAttr::get( + ctx, 1, mmaEnc, oldBType.getElementType()); + auto newBType = RankedTensorType::get( @@ -253,7 +248,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect + + // convert metadata + Value meta = dotOp.getAMeta(); -+ auto oldMetaType = meta.getType().cast(); ++ auto oldMetaType = cast(meta.getType()); + auto newMetaType = RankedTensorType::get( + oldMetaType.getShape(), oldMetaType.getElementType(), + SparseDotMetaEncodingAttr::get(ctx, mmaEnc)); @@ -275,7 +270,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect } // namespace static Value promoteOperand(OpBuilder &builder, Location loc, Value operand, -@@ -397,6 +491,7 @@ public: +@@ -394,6 +489,7 @@ public: mlir::RewritePatternSet patterns(context); patterns.add<::BlockedToMMA>(context, computeCapability); @@ -284,33 +279,31 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp b/lib/Dialect signalPassFailure(); } diff --git a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp b/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp +index 97ca6a840..f0ef124ff 100644 --- a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp +++ b/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp -@@ -47,6 +47,10 @@ struct PipelinedOpInfo { - bool loadIsMMAV3 = false; +@@ -188,6 +188,10 @@ public: + } }; -+bool isDotOp(Operation* op) { ++static bool isDotOp(Operation* op) { + return isa(op); +} + - } // namespace - static bool isMMAv3Dot(Operation *op) { -@@ -165,22 +169,28 @@ getSharedEncIfAllUsersAreDotEnc(Value val) { + auto dot = dyn_cast(op); + if (!dot) +@@ -399,19 +403,28 @@ getSharedEncIfAllUsersAreDotEnc(Value val) { } else { if (!isa(user)) return std::nullopt; -- auto dotOpEnc = user->getResult(0) -- .getType() -- .cast() -- .getEncoding() -- .dyn_cast(); +- auto dotOpEnc = dyn_cast( +- cast(user->getResult(0).getType()).getEncoding()); - if (!dotOpEnc) + auto enc = -+ user->getResult(0).getType().cast().getEncoding(); ++ cast(user->getResult(0).getType()).getEncoding(); + if (isa(enc)) { -+ auto srcTy = val.getType().cast(); ++ auto srcTy = cast(val.getType()); + auto CTALayout = ttg::getCTALayout(srcTy.getEncoding()); + auto order = ttg::getOrder(srcTy.getEncoding()); + unsigned bitWidth = srcTy.getElementType().getIntOrFloatBitWidth(); @@ -321,14 +314,14 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp b + srcTy.getElementType().getIntOrFloatBitWidth(), + /*needTrans=*/false); + } else if (isa(enc)) { -+ auto srcTy = val.getType().cast(); ++ auto srcTy = cast(val.getType()); + tempAttr = ttg::SharedEncodingAttr::get( + val.getContext(), /*vec=*/1, /*perPhase=*/1, /*maxPhase=*/1, + ttg::getOrder(srcTy.getEncoding()), + ttg::getCTALayout(srcTy.getEncoding())); + } else { return std::nullopt; -- auto srcTy = val.getType().cast(); +- auto srcTy = cast(val.getType()); - auto CTALayout = ttg::getCTALayout(srcTy.getEncoding()); - auto order = ttg::getOrder(srcTy.getEncoding()); - unsigned bitWidth = srcTy.getElementType().getIntOrFloatBitWidth(); @@ -341,71 +334,63 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp b } // Check that the shared encodings needed by the users are compatible. if (!tempAttr || (attr != nullptr && attr != tempAttr)) -@@ -313,7 +323,7 @@ loadOpsToDistanceAndUse(scf::ForOp forOp) { +@@ -518,7 +531,7 @@ loadOpsToIndirectionLevelAndUse(scf::ForOp forOp) { }; for (Operation &op : forOp.getBody()->without_terminator()) { - if (!isa(op)) + if (!isDotOp(&op)) continue; + seen.clear(); dfs(&op, 0, &op); - } -@@ -391,7 +401,8 @@ collectOpsToPipeline(scf::ForOp forOp, - // loads. - for (auto &[loadOp, distAndUse] : loadOpToDistAndUse) { - PipelinedOpInfo loadInfo; -- if (auto dot = dyn_cast(distAndUse.second)) { -+ if (isDotOp(distAndUse.second)) { -+ auto dot = dyn_cast(distAndUse.second); - if (loadIsMMAv3(loadOp)) { +@@ -595,7 +608,8 @@ assignMemoryLayouts(llvm::SmallVector> + continue; + } + +- if (auto dot = dyn_cast(use)) { ++ if (isDotOp(use)) { ++ auto dot = dyn_cast(use); + loadInfo.usedByDot = true; + if (loadIsMMAv3(op)) { loadInfo.loadIsMMAV3 = true; - loadInfo.sharedEncoding = -@@ -410,7 +421,7 @@ collectOpsToPipeline(scf::ForOp forOp, +@@ -614,7 +628,7 @@ assignMemoryLayouts(llvm::SmallVector> // The codegen bug is caught by an assertion, so if you think you've // fixed it, feel free to delete this code and see if the assert still // fails. :) - if (!loadInfo.sharedEncoding) { + if (dot && !loadInfo.sharedEncoding) { - if (auto dotEnc = dot.getResult() - .getType() - .getEncoding() -@@ -788,7 +799,7 @@ bool mlir::triton::preProcessLoopAndGetSchedule( - int useStage = opToInfo[info.use].stage; - int numBuffers = useStage - defStage; - -- if (hasMMAV3 && isa(info.use)) { -+ if (hasMMAV3 && isDotOp(info.use)) { - // For MMAv3, we need an extra buffer as this is assumed in the wgmma - // pipelining post-processing. - numBuffers++; + if (auto dotEnc = dyn_cast( + dot.getResult().getType().getEncoding())) { + auto loadTy = cast(op->getResultTypes()[0]); diff --git a/lib/Dialect/TritonGPU/Transforms/ReduceDataDuplication.cpp b/lib/Dialect/TritonGPU/Transforms/ReduceDataDuplication.cpp +index 2211df31b..ee5ff44d8 100644 --- a/lib/Dialect/TritonGPU/Transforms/ReduceDataDuplication.cpp +++ b/lib/Dialect/TritonGPU/Transforms/ReduceDataDuplication.cpp -@@ -36,6 +36,10 @@ public: +@@ -37,6 +37,10 @@ public: auto srcEncoding = srcType.getEncoding(); - if (srcEncoding.isa()) + if (isa(srcEncoding)) return; -+ if (dstType.getEncoding().isa()) { ++ if (isa(dstType.getEncoding())) { + replaceSparseMetaEncoding(cvtOp); + return; + } auto dstDotOp = - dstType.getEncoding().dyn_cast(); + dyn_cast(dstType.getEncoding()); if (!dstDotOp) -@@ -74,6 +78,27 @@ public: +@@ -83,6 +87,27 @@ public: cvtOp.erase(); }); } + + private: + void replaceSparseMetaEncoding(triton::gpu::ConvertLayoutOp cvtOp) { -+ auto srcType = cvtOp.getOperand().getType().cast(); ++ auto srcType = cast(cvtOp.getOperand().getType()); + auto srcEncoding = srcType.getEncoding(); + auto sharedLayout = triton::gpu::SharedEncodingAttr::get( + cvtOp.getContext(), 8, 1, 1, triton::gpu::getOrder(srcEncoding), + triton::gpu::getCTALayout(srcEncoding)); + -+ auto dstType = cvtOp.getType().cast(); ++ auto dstType = cast(cvtOp.getType()); + auto tmpType = triton::MemDescType::get( + dstType.getShape(), dstType.getElementType(), sharedLayout); + @@ -421,6 +406,7 @@ diff --git a/lib/Dialect/TritonGPU/Transforms/ReduceDataDuplication.cpp b/lib/Di std::unique_ptr mlir::triton::gpu::createReduceDataDuplicationPass() { diff --git a/lib/Dialect/TritonNvidiaGPU/Transforms/FenceInsertion.cpp b/lib/Dialect/TritonNvidiaGPU/Transforms/FenceInsertion.cpp +index f456d36a6..a1dac2b72 100644 --- a/lib/Dialect/TritonNvidiaGPU/Transforms/FenceInsertion.cpp +++ b/lib/Dialect/TritonNvidiaGPU/Transforms/FenceInsertion.cpp @@ -45,7 +45,7 @@ public: @@ -432,7 +418,7 @@ diff --git a/lib/Dialect/TritonNvidiaGPU/Transforms/FenceInsertion.cpp b/lib/Dia return WalkResult::advance(); OpBuilder builder(op); auto a = op->getOperand(0); -@@ -83,7 +83,7 @@ private: +@@ -80,7 +80,7 @@ private: static DenseSet> trace; auto op = operand.getDefiningOp(); // avoid redundant insertion diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton_cuda.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton_cuda.cc index 2ad76a6391226c..0606d115ced97f 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton_cuda.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton_cuda.cc @@ -18,6 +18,7 @@ limitations under the License. #include "nvidia/include/NVGPUToLLVM/NVGPUToLLVMPass.h" #include "nvidia/include/TritonNVIDIAGPUToLLVM/Passes.h" #include "absl/status/status.h" +#include "absl/strings/str_format.h" #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" // from @llvm-project #include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h" // from @llvm-project #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" // from @llvm-project @@ -60,7 +61,8 @@ absl::Status CreateTritonPipeline( // Based on make_ttgir() in // @triton//:third_party/nvidia/backend/compiler.py pm.addPass(mt::createConvertTritonToTritonGPUPass( - config.num_warps, threadsPerWarp, config.num_ctas, ccAsInt)); + absl::StrFormat("cuda:%u", ccAsInt), config.num_warps, threadsPerWarp, + config.num_ctas)); pm.addPass(mt::gpu::createCoalescePass()); if (ccCuda.IsAtLeastAmpere()) { pm.addPass(mt::gpu::createF32DotTCPass()); @@ -70,7 +72,7 @@ absl::Status CreateTritonPipeline( pm.addPass(mt::gpu::createOptimizeThreadLocalityPass()); pm.addPass(mt::gpu::createAccelerateMatmulPass(ccAsInt)); pm.addPass(mt::gpu::createRemoveLayoutConversionsPass()); - pm.addPass(mt::gpu::createOptimizeDotOperandsPass()); + pm.addPass(mt::gpu::createOptimizeDotOperandsPass(ccCuda.IsAtLeastAmpere())); pm.addPass(mlir::createCSEPass()); pm.addPass(mt::gpu::createPipelinePass(config.num_stages, config.num_warps, @@ -80,7 +82,7 @@ absl::Status CreateTritonPipeline( pm.addPass(mt::gpu::createPrefetchPass()); } - pm.addPass(mt::gpu::createOptimizeDotOperandsPass()); + pm.addPass(mt::gpu::createOptimizeDotOperandsPass(ccCuda.IsAtLeastAmpere())); // We need to disable this pass because it undoes the hoisting of dot_operand // layout conversion done in // triton/lib/Dialect/TritonGPU/Transforms/OptimizeDotOperands.cpp in @@ -93,6 +95,7 @@ absl::Status CreateTritonPipeline( pm.addPass(mlir::createSymbolDCEPass()); if (ccCuda.IsAtLeastHopper()) { pm.addPass(mlir::createTritonNvidiaGPUFenceInsertionPass(ccAsInt)); + pm.addPass(mlir::createTritonNvidiaGPUTMALoweringPass()); } pm.addPass(mlir::createCanonicalizerPass()); diff --git a/third_party/xla/xla/service/gpu/tests/sparse_add_layout.mlir b/third_party/xla/xla/service/gpu/tests/sparse_add_layout.mlir index 3d1e42b46c80d6..55faf1b76bda3d 100644 --- a/third_party/xla/xla/service/gpu/tests/sparse_add_layout.mlir +++ b/third_party/xla/xla/service/gpu/tests/sparse_add_layout.mlir @@ -1,7 +1,7 @@ -// RUN: triton-opt %s -split-input-file -convert-triton-to-tritongpu | FileCheck %s +// RUN: triton-opt %s -split-input-file -convert-triton-to-tritongpu='target=cuda:80 num-warps=4' | FileCheck %s // CHECK-COUNT-4: #triton_gpu.blocked -module attributes {"triton_gpu.num-warps" = 4 : i32} { +module { tt.func @sparse_dot() { %A = arith.constant dense<1.00e+00> : tensor<64x32xf16> %meta = arith.constant dense<0x3333> : tensor<64x4xi16> From 322831ccc5a8fef9a98ab068e6fc77ed46a25f37 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 14 May 2024 07:04:41 -0700 Subject: [PATCH 112/478] Avoid FindIndex function by using the inverse permutation. Let p be a permutation. FindIndex(p, x) should return y where x = p[y]. This is equivalent to p^-1[x] = p^-1[p[y]] = y, so y = p^-1[x]. PiperOrigin-RevId: 633572618 --- .../xla/xla/service/layout_normalization.cc | 23 +++++++++++-------- .../xla/service/layout_normalization_test.cc | 18 +++++++++++++++ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/third_party/xla/xla/service/layout_normalization.cc b/third_party/xla/xla/service/layout_normalization.cc index 6837437f8f93e2..9974feb9541724 100644 --- a/third_party/xla/xla/service/layout_normalization.cc +++ b/third_party/xla/xla/service/layout_normalization.cc @@ -156,7 +156,7 @@ class LayoutNormalizationVisitor : public DfsHloRewriteVisitor { auto normalized_shape = Normalize(s); auto layout_as_permutation = ToTransposeDimensions(s.layout()); int64_t normalized_concat_dim = - FindIndex(layout_as_permutation, orig_concat_dim); + InversePermutation(layout_as_permutation)[orig_concat_dim]; auto normalized_concat = hlo->AddInstruction(HloInstruction::CreateConcatenate( normalized_shape, normalized_inputs, normalized_concat_dim)); @@ -224,10 +224,12 @@ class LayoutNormalizationVisitor : public DfsHloRewriteVisitor { ToTransposeDimensions(s.layout()); std::vector br_dimensions; if (!hlo->dimensions().empty()) { - br_dimensions = Permute(hlo->dimensions(), layout_as_permutation); - } - for (int64_t& d : br_dimensions) { - d = FindIndex(orig_output_layout_as_permutation, d); + br_dimensions.reserve(hlo->dimensions().size()); + auto inverse_perm = InversePermutation(orig_output_layout_as_permutation); + for (int64_t dim : + ComposePermutations(hlo->dimensions(), layout_as_permutation)) { + br_dimensions.push_back(inverse_perm[dim]); + } } auto normalized_broadcast = MakeBroadcastHlo( normalized_input, br_dimensions, normalized_shape, &hlo->metadata()); @@ -244,9 +246,8 @@ class LayoutNormalizationVisitor : public DfsHloRewriteVisitor { auto normalized_shape = Normalize(s); std::vector orig_output_layout_as_permutation = ToTransposeDimensions(s.layout()); - int64_t iota_dimension = hlo->dimensions()[0]; - int64_t new_iota_dimension = - FindIndex(orig_output_layout_as_permutation, iota_dimension); + int64_t new_iota_dimension = InversePermutation( + orig_output_layout_as_permutation)[hlo->dimensions()[0]]; auto normalized_iota = hlo->AddInstruction( HloInstruction::CreateIota(normalized_shape, new_iota_dimension)); SetVisited(*normalized_iota); @@ -468,8 +469,9 @@ class LayoutNormalizationVisitor : public DfsHloRewriteVisitor { ToTransposeDimensions(hlo->shape().layout()); std::vector new_dimensions; new_dimensions.reserve(hlo->dimensions().size()); + auto inverse_perm = InversePermutation(layout_as_permutation); for (int64_t dim : hlo->dimensions()) { - new_dimensions.push_back(FindIndex(layout_as_permutation, dim)); + new_dimensions.push_back(inverse_perm[dim]); } absl::c_sort(new_dimensions); auto normalized_reverse = hlo->AddInstruction( @@ -499,8 +501,9 @@ class LayoutNormalizationVisitor : public DfsHloRewriteVisitor { new_padding.add_dimensions(); } + auto inverse_perm = InversePermutation(layout_as_permutation); for (int dim = 0; dim < s.dimensions_size(); dim++) { - int tr_dim = static_cast(FindIndex(layout_as_permutation, dim)); + int tr_dim = static_cast(inverse_perm[dim]); *new_padding.mutable_dimensions(tr_dim) = padded_config.dimensions(dim); } diff --git a/third_party/xla/xla/service/layout_normalization_test.cc b/third_party/xla/xla/service/layout_normalization_test.cc index dd528096ffa312..6c5da347cb1d58 100644 --- a/third_party/xla/xla/service/layout_normalization_test.cc +++ b/third_party/xla/xla/service/layout_normalization_test.cc @@ -235,6 +235,24 @@ ENTRY main { )"); } +TEST_F(LayoutNormalizationTest, BroadcastOperandLayoutNotInverseOfItself) { + const char* hlo = R"( +HloModule module + +ENTRY main { + a = f32[4,3,5]{0,2,1} parameter(0) + b = f32[4,3,2,5]{0,1,2,3} broadcast(a), dimensions={0,1,3} + ROOT out = abs(b) +} +)"; + + CheckLayoutNormalization(hlo, R"( +// CHECK: [[bitcast_1:%[^ ]+]] = f32[3,5,4]{2,1,0} bitcast +// CHECK: [[broadcast_0:%[^ ]+]] = f32[5,2,3,4]{3,2,1,0} broadcast([[bitcast_1]]), dimensions={2,0,3} +// CHECK: [[abs_2:%[^ ]+]] = f32[5,2,3,4]{3,2,1,0} abs([[broadcast_0]]) +)"); +} + TEST_F(LayoutNormalizationTest, BroadcastCustomOutputLayout) { const char* hlo = R"( HloModule module From ece868ec01922cc9faf3c5746ce4dbc67100d3de Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 07:18:11 -0700 Subject: [PATCH 113/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633575662 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 7322cef85415f5..5019fe2a5b2fad 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugstr +go/debugproto op { name: "Abort" attr { From 9e2a14d27e834aef4f46bd38b849801df64f6cb1 Mon Sep 17 00:00:00 2001 From: Greg Olechwierowicz Date: Tue, 14 May 2024 07:25:41 -0700 Subject: [PATCH 114/478] [XLA:GPU] Add utility functions to iterate over instructions matching opcode. PiperOrigin-RevId: 633577273 --- third_party/xla/xla/hlo/utils/BUILD | 16 +++ third_party/xla/xla/hlo/utils/hlo_query.h | 20 ++++ .../xla/xla/hlo/utils/hlo_query_test.cc | 113 ++++++++++++++++++ 3 files changed, 149 insertions(+) create mode 100644 third_party/xla/xla/hlo/utils/hlo_query_test.cc diff --git a/third_party/xla/xla/hlo/utils/BUILD b/third_party/xla/xla/hlo/utils/BUILD index 5b08563b2d483b..39153f7e1eda67 100644 --- a/third_party/xla/xla/hlo/utils/BUILD +++ b/third_party/xla/xla/hlo/utils/BUILD @@ -154,3 +154,19 @@ cc_library( "@com_google_absl//absl/container:flat_hash_set", ], ) + +xla_cc_test( + name = "hlo_query_test", + srcs = [ + "hlo_query_test.cc", + ], + deps = [ + ":hlo_query", + "//xla/hlo/ir:hlo", + "//xla/service:hlo_parser", + "//xla/tests:hlo_test_base", + "@com_google_absl//absl/strings:string_view", + "@com_google_googletest//:gtest_main", + "@local_tsl//tsl/platform:statusor", + ], +) diff --git a/third_party/xla/xla/hlo/utils/hlo_query.h b/third_party/xla/xla/hlo/utils/hlo_query.h index e12e3306651375..8343df4dc24472 100644 --- a/third_party/xla/xla/hlo/utils/hlo_query.h +++ b/third_party/xla/xla/hlo/utils/hlo_query.h @@ -78,6 +78,26 @@ bool IsBroadcastOfParameter(const HloInstruction& instr); HloInstruction* GetFirstInstructionWithOpcode(const HloComputation& computation, HloOpcode opcode); +// Applies `fn` to a collection of instruction for a given `computation`. +template +void ForEachInstructionWithOpcode(HloComputation& computation, HloOpcode opcode, + Fn&& fn) { + for (HloInstruction* instr : computation.instructions()) { + if (instr->opcode() == opcode) { + fn(instr); + } + } +} + +// Applies `fn` to a collection of instruction for a given `module`. +template +void ForEachInstructionWithOpcode(HloModule& module, HloOpcode opcode, + Fn&& fn) { + for (HloComputation* computation : module.computations()) { + ForEachInstructionWithOpcode(*computation, opcode, fn); + } +} + // Determines whether the given computation contains an instruction with one of // the given opcodes. Checks both comp's instructions and the instructions of // any computations nested within it. diff --git a/third_party/xla/xla/hlo/utils/hlo_query_test.cc b/third_party/xla/xla/hlo/utils/hlo_query_test.cc new file mode 100644 index 00000000000000..7697bffc855806 --- /dev/null +++ b/third_party/xla/xla/hlo/utils/hlo_query_test.cc @@ -0,0 +1,113 @@ +/* Copyright 2024 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "xla/hlo/utils/hlo_query.h" + +#include + +#include +#include "absl/strings/string_view.h" +#include "xla/hlo/ir/hlo_computation.h" +#include "xla/hlo/ir/hlo_module.h" +#include "xla/hlo/ir/hlo_opcode.h" +#include "xla/service/hlo_parser.h" +#include "xla/tests/hlo_test_base.h" +#include "tsl/platform/statusor.h" + +namespace xla { +namespace { + +using HloQueryTest = HloTestBase; + +template +int CountInstructions(Hlo& module, HloOpcode opcode) { + int counter = 0; + hlo_query::ForEachInstructionWithOpcode( + module, opcode, [&counter](auto& instr) { counter++; }); + return counter; +} + +TEST_F(HloQueryTest, + GetInstructionWithOpCodeReturnsMatchingInstructionForModule) { + constexpr absl::string_view kHloString = R"( +HloModule m + +computation.0 { + param.0 = f32[32]{0} parameter(0) + ROOT _ = f32[32]{0} rsqrt(param.0) +} + +ENTRY main { + param.0 = f32[32]{0} parameter(0) + param.1 = f32[32]{0} parameter(1) + param.2 = f32[32]{0} parameter(2) + param.3 = f32[32]{0} parameter(3) + add.0 = f32[32]{0} add(param.0,param.1) + add.1 = f32[32]{0} add(param.1,param.2) + sub.0 = f32[32]{0} subtract(param.0,param.1) + mul.0 = f32[32]{0} multiply(param.0,param.1) + mul.1 = f32[32]{0} multiply(param.1,param.2) + mul.2 = f32[32]{0} multiply(param.2,param.3) + comp.0 = call(param.0), to_apply=computation.0 + ROOT _ = (f32[32],f32[32],f32[32],f32[32],f32[32],f32[32],f32[32]) tuple(comp.0,add.0,add.1,sub.0,mul.0,mul.1,mul.2) +})"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnUnverifiedModule(kHloString)); + EXPECT_EQ(CountInstructions(*module, HloOpcode::kAdd), 2); + EXPECT_EQ(CountInstructions(*module, HloOpcode::kSubtract), 1); + EXPECT_EQ(CountInstructions(*module, HloOpcode::kMultiply), 3); +} + +TEST_F(HloQueryTest, + GetInstructionWithOpCodeReturnsMatchingInstructionForComputation) { + constexpr absl::string_view kHloString = R"( +HloModule m + +computation.0 { + param.0 = f32[32]{0} parameter(0) + param.1 = f32[32]{0} parameter(1) + param.2 = f32[32]{0} parameter(2) + param.3 = f32[32]{0} parameter(3) + add.0 = f32[32]{0} add(param.0,param.1) + add.1 = f32[32]{0} add(param.1,param.2) + sub.0 = f32[32]{0} subtract(param.0,param.1) + mul.0 = f32[32]{0} multiply(param.0,param.1) + mul.1 = f32[32]{0} multiply(param.1,param.2) + ROOT mul.2 = f32[32]{0} multiply(param.2,param.3) +} + +ENTRY main { + param.0 = f32[32]{0} parameter(0) + param.1 = f32[32]{0} parameter(1) + param.2 = f32[32]{0} parameter(2) + param.3 = f32[32]{0} parameter(3) + add.0 = f32[32]{0} add(param.0,param.1) + sub.0 = f32[32]{0} subtract(param.0,param.1) + mul.0 = f32[32]{0} multiply(param.0,param.1) + comp.0 = f32[32]{0} call(param.0,param.1,param.2), to_apply=computation.0 + ROOT _ = (f32[32],f32[32],f32[32],f32[32]) tuple(add.0,sub.0,mul.0,comp.0) +})"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnUnverifiedModule(kHloString)); + HloComputation* computation = module->GetComputationWithName("computation.0"); + EXPECT_EQ(CountInstructions(*computation, HloOpcode::kAdd), 2); + EXPECT_EQ(CountInstructions(*computation, HloOpcode::kSubtract), 1); + EXPECT_EQ(CountInstructions(*computation, HloOpcode::kMultiply), 3); +} + +} // namespace +} // namespace xla From a26c03d30732a3c17b2a18dd7156e26920d1cf61 Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Tue, 14 May 2024 08:22:56 -0700 Subject: [PATCH 115/478] PR #12433: [GPU] Make cuDNN fusion test run on H100. Imported from GitHub PR https://github.com/openxla/xla/pull/12433 Copybara import of the project: -- 856f1ddb05b0cecf1092aa3547611274b27f0796 by Ilia Sergachev : [GPU] Make cuDNN fusion test run on H100. Merging this change closes #12433 PiperOrigin-RevId: 633592263 --- third_party/xla/xla/service/gpu/fusions/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/xla/xla/service/gpu/fusions/BUILD b/third_party/xla/xla/service/gpu/fusions/BUILD index 2e0a1202157486..1b1d9c8a5ef0e5 100644 --- a/third_party/xla/xla/service/gpu/fusions/BUILD +++ b/third_party/xla/xla/service/gpu/fusions/BUILD @@ -665,7 +665,7 @@ xla_test( name = "cudnn_test", srcs = if_cuda_is_configured(["cudnn_test.cc"]), backend_tags = {"gpu": [ - "requires-gpu-sm80", + "requires-gpu-sm90", ]}, backends = [ "gpu", From d030362628a694b8b499e4e7d9ed835267be43d4 Mon Sep 17 00:00:00 2001 From: Kyle Lucke Date: Tue, 14 May 2024 08:59:27 -0700 Subject: [PATCH 116/478] Lift Kernel::Create and TypedKernel::Create methods out into separate factory objects to remove circular dependencies. Existing Create methods left in place until all uses are eliminated. PiperOrigin-RevId: 633601919 --- third_party/xla/xla/service/gpu/BUILD | 1 + third_party/xla/xla/service/gpu/kernels/BUILD | 3 + .../cutlass_gemm_custom_kernel_benchmarks.cc | 4 +- .../cutlass_gemm_custom_kernel_test.cc | 7 +- .../gpu/kernels/topk_custom_kernel_test.cc | 7 +- third_party/xla/xla/service/gpu/runtime/BUILD | 5 +- .../service/gpu/runtime/command_buffer_cmd.cc | 3 +- .../xla/service/gpu/runtime/kernel_thunk.cc | 7 +- .../xla/service/gpu/stream_executor_util.cc | 3 +- third_party/xla/xla/stream_executor/BUILD | 30 ++++++ third_party/xla/xla/stream_executor/gpu/BUILD | 2 + .../gpu/gpu_command_buffer_test.cc | 15 +-- .../stream_executor/gpu/gpu_kernel_test.cc | 6 +- third_party/xla/xla/stream_executor/kernel.h | 9 ++ .../xla/xla/stream_executor/kernel_factory.h | 44 +++++++++ .../stream_executor/typed_kernel_factory.h | 95 +++++++++++++++++++ 16 files changed, 221 insertions(+), 20 deletions(-) create mode 100644 third_party/xla/xla/stream_executor/kernel_factory.h create mode 100644 third_party/xla/xla/stream_executor/typed_kernel_factory.h diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 223aa418be67e7..91776ad61dee01 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -4364,6 +4364,7 @@ cc_library( "//xla/hlo/ir:hlo", "//xla/service:hlo_module_config", "//xla/stream_executor", + "//xla/stream_executor:kernel_factory", "//xla/stream_executor:launch_dim", "//xla/tsl/util:env_var", "//xla/tsl/util/proto:proto_utils", diff --git a/third_party/xla/xla/service/gpu/kernels/BUILD b/third_party/xla/xla/service/gpu/kernels/BUILD index 8d950a052f2aef..fc9c7ec54b2aff 100644 --- a/third_party/xla/xla/service/gpu/kernels/BUILD +++ b/third_party/xla/xla/service/gpu/kernels/BUILD @@ -232,6 +232,7 @@ xla_test( "//xla:xla_data_proto_cc", "//xla/service:platform_util", "//xla/stream_executor", + "//xla/stream_executor:kernel_factory", "//xla/stream_executor:platform", "//xla/stream_executor:platform_manager", "//xla/stream_executor/cuda:cuda_platform", @@ -281,6 +282,7 @@ xla_test( ":cutlass_gemm_custom_kernel", "//xla:xla_data_proto_cc", "//xla/stream_executor", + "//xla/stream_executor:kernel_factory", "//xla/stream_executor:platform", "//xla/stream_executor:platform_manager", "//xla/stream_executor/cuda:cuda_platform", @@ -301,6 +303,7 @@ cc_binary( "//xla:xla_data_proto_cc", "//xla/service:gpu_plugin", "//xla/stream_executor", + "//xla/stream_executor:kernel_factory", "//xla/stream_executor:platform", "//xla/stream_executor:platform_manager", "//xla/stream_executor/cuda:cuda_platform", diff --git a/third_party/xla/xla/service/gpu/kernels/cutlass_gemm_custom_kernel_benchmarks.cc b/third_party/xla/xla/service/gpu/kernels/cutlass_gemm_custom_kernel_benchmarks.cc index e2f7bccea9fe57..b843fac733154c 100644 --- a/third_party/xla/xla/service/gpu/kernels/cutlass_gemm_custom_kernel_benchmarks.cc +++ b/third_party/xla/xla/service/gpu/kernels/cutlass_gemm_custom_kernel_benchmarks.cc @@ -20,6 +20,7 @@ limitations under the License. #include "xla/service/gpu/kernels/cutlass_gemm_custom_kernel.h" #include "xla/stream_executor/device_description.h" #include "xla/stream_executor/kernel.h" +#include "xla/stream_executor/kernel_factory.h" #include "xla/stream_executor/platform.h" #include "xla/stream_executor/platform_manager.h" #include "xla/stream_executor/stream.h" @@ -56,7 +57,8 @@ static void BM_RowMajorGemm(benchmark::State& state) { /*indices=*/{0, 1, 2}, /*slices=*/{}, device); TF_ASSERT_OK_AND_ASSIGN( - auto gemm, se::Kernel::Create(executor, custom_kernel->kernel_spec())); + auto gemm, + se::KernelFactory::Create(executor, custom_kernel->kernel_spec())); // Prepare arguments: a=1.1, b=1.2, c=0.0 se::DeviceMemory a = executor->AllocateArray(m * k, 0); diff --git a/third_party/xla/xla/service/gpu/kernels/cutlass_gemm_custom_kernel_test.cc b/third_party/xla/xla/service/gpu/kernels/cutlass_gemm_custom_kernel_test.cc index 3748ed5251564e..b566fceab8511c 100644 --- a/third_party/xla/xla/service/gpu/kernels/cutlass_gemm_custom_kernel_test.cc +++ b/third_party/xla/xla/service/gpu/kernels/cutlass_gemm_custom_kernel_test.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include "xla/stream_executor/kernel.h" +#include "xla/stream_executor/kernel_factory.h" #include "xla/stream_executor/platform.h" #include "xla/stream_executor/platform_manager.h" #include "xla/stream_executor/stream.h" @@ -46,7 +47,8 @@ TEST(CutlassGemmKernelTest, SimpleGemm) { /*indices=*/{0, 1, 2}, /*slices=*/{}, executor->GetDeviceDescription()); TF_ASSERT_OK_AND_ASSIGN( - auto gemm, se::Kernel::Create(executor, custom_kernel->kernel_spec())); + auto gemm, + se::KernelFactory::Create(executor, custom_kernel->kernel_spec())); int64_t length = 4 * 4; int64_t byte_length = sizeof(float) * length; @@ -96,7 +98,8 @@ TEST(CutlassGemmKernelTest, LoadFromSharedLibrary) { /*indices=*/{0, 1, 2}, /*slices=*/{}, executor->GetDeviceDescription()); TF_ASSERT_OK_AND_ASSIGN( - auto gemm, se::Kernel::Create(executor, custom_kernel->kernel_spec())); + auto gemm, + se::KernelFactory::Create(executor, custom_kernel->kernel_spec())); int64_t length = 4 * 4; int64_t byte_length = sizeof(float) * length; diff --git a/third_party/xla/xla/service/gpu/kernels/topk_custom_kernel_test.cc b/third_party/xla/xla/service/gpu/kernels/topk_custom_kernel_test.cc index d50fd054df2ac3..b083118d0a8d7c 100644 --- a/third_party/xla/xla/service/gpu/kernels/topk_custom_kernel_test.cc +++ b/third_party/xla/xla/service/gpu/kernels/topk_custom_kernel_test.cc @@ -28,6 +28,7 @@ limitations under the License. #include "absl/strings/substitute.h" #include "xla/service/platform_util.h" #include "xla/stream_executor/kernel.h" +#include "xla/stream_executor/kernel_factory.h" #include "xla/stream_executor/platform.h" #include "xla/stream_executor/platform_manager.h" #include "xla/stream_executor/stream.h" @@ -111,7 +112,8 @@ TEST_P(TopKKernelTest, TopKFloat) { GetTopKKernel("topk", PrimitiveType::F32, n, k, batch_size); TF_ASSERT_OK_AND_ASSIGN( - auto kernel, se::Kernel::Create(executor, custom_kernel->kernel_spec())); + auto kernel, + se::KernelFactory::Create(executor, custom_kernel->kernel_spec())); // Launch topk kernel with device memory arguments. se::KernelArgsDeviceMemoryArray arr( @@ -165,7 +167,8 @@ TEST_P(TopKKernelTest, TopKPackedNegative) { GetTopKKernel("topk", PrimitiveType::F32, n, k, batch_size); TF_ASSERT_OK_AND_ASSIGN( - auto kernel, se::Kernel::Create(executor, custom_kernel->kernel_spec())); + auto kernel, + se::KernelFactory::Create(executor, custom_kernel->kernel_spec())); // Launch topk kernel with device memory arguments. se::KernelArgsDeviceMemoryArray arr( diff --git a/third_party/xla/xla/service/gpu/runtime/BUILD b/third_party/xla/xla/service/gpu/runtime/BUILD index c259a044cb8308..4175990422e7bb 100644 --- a/third_party/xla/xla/service/gpu/runtime/BUILD +++ b/third_party/xla/xla/service/gpu/runtime/BUILD @@ -85,6 +85,7 @@ cc_library( "//xla/service/gpu/kernels:custom_kernel", "//xla/service/gpu/runtime:thunk", "//xla/stream_executor", + "//xla/stream_executor:kernel_factory", "//xla/stream_executor:trace_command_buffer_factory", "//xla/stream_executor/gpu:gpu_stream_header", "//xla/stream_executor/gpu:gpu_types_header", @@ -712,6 +713,7 @@ cc_library( srcs = ["kernel_thunk.cc"], hdrs = ["kernel_thunk.h"], deps = [ + ":thunk", "//xla:status", "//xla:types", "//xla/hlo/ir:hlo", @@ -720,8 +722,8 @@ cc_library( "//xla/service/gpu:launch_dimensions", "//xla/service/gpu:stream_executor_util", "//xla/service/gpu/kernels:custom_kernel", - "//xla/service/gpu/runtime:thunk", "//xla/stream_executor", + "//xla/stream_executor:kernel_factory", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:inlined_vector", @@ -729,7 +731,6 @@ cc_library( "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:span", - "@llvm-project//mlir:IR", "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:statusor", diff --git a/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd.cc b/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd.cc index 5a6263fe3ef557..75a5d704c33bff 100644 --- a/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd.cc +++ b/third_party/xla/xla/service/gpu/runtime/command_buffer_cmd.cc @@ -63,6 +63,7 @@ limitations under the License. #include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/dnn.h" #include "xla/stream_executor/kernel.h" +#include "xla/stream_executor/kernel_factory.h" #include "xla/stream_executor/launch_dim.h" #include "xla/stream_executor/stream.h" #include "xla/stream_executor/stream_executor.h" @@ -670,7 +671,7 @@ absl::Status CustomKernelLaunchCmd::Initialize( TF_ASSIGN_OR_RETURN( std::unique_ptr kernel, - se::Kernel::Create(params.executor, custom_kernel_.kernel_spec())); + se::KernelFactory::Create(params.executor, custom_kernel_.kernel_spec())); absl::MutexLock lock(&mutex_); kernels_.emplace(params.executor, std::move(kernel)); diff --git a/third_party/xla/xla/service/gpu/runtime/kernel_thunk.cc b/third_party/xla/xla/service/gpu/runtime/kernel_thunk.cc index 063940d8ab7cd4..e5cca1703a8e1d 100644 --- a/third_party/xla/xla/service/gpu/runtime/kernel_thunk.cc +++ b/third_party/xla/xla/service/gpu/runtime/kernel_thunk.cc @@ -38,6 +38,7 @@ limitations under the License. #include "xla/status.h" #include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/kernel.h" +#include "xla/stream_executor/kernel_factory.h" #include "xla/stream_executor/launch_dim.h" #include "xla/stream_executor/stream_executor.h" #include "tsl/platform/errors.h" @@ -188,9 +189,9 @@ absl::Status CustomKernelThunk::Initialize(const InitializeParams& params) { auto it = kernel_cache_.find(params.executor); if (kernel_cache_.end() == it) { - TF_ASSIGN_OR_RETURN( - std::unique_ptr kernel, - se::Kernel::Create(params.executor, custom_kernel_.kernel_spec())); + TF_ASSIGN_OR_RETURN(std::unique_ptr kernel, + se::KernelFactory::Create( + params.executor, custom_kernel_.kernel_spec())); kernel_cache_.emplace(params.executor, std::move(kernel)); } diff --git a/third_party/xla/xla/service/gpu/stream_executor_util.cc b/third_party/xla/xla/service/gpu/stream_executor_util.cc index 221b36c95683d2..8760faa725bcef 100644 --- a/third_party/xla/xla/service/gpu/stream_executor_util.cc +++ b/third_party/xla/xla/service/gpu/stream_executor_util.cc @@ -52,6 +52,7 @@ limitations under the License. #include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/dnn.h" #include "xla/stream_executor/kernel.h" +#include "xla/stream_executor/kernel_factory.h" #include "xla/stream_executor/kernel_spec.h" #include "xla/stream_executor/launch_dim.h" #include "xla/stream_executor/platform.h" @@ -368,7 +369,7 @@ absl::StatusOr> CreateKernel( } TF_ASSIGN_OR_RETURN(std::unique_ptr kernel, - se::Kernel::Create(stream_exec, loader_spec)); + se::KernelFactory::Create(stream_exec, loader_spec)); se::KernelMetadata m; m.set_shared_memory_bytes(shared_mem_bytes); diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index b73e82b9e42607..3899b903553264 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -607,6 +607,7 @@ cc_library( ":kernel_spec", ":platform", ":stream_executor_headers", + "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/meta:type_traits", "@com_google_absl//absl/status", @@ -620,6 +621,35 @@ cc_library( ], ) +cc_library( + name = "kernel_factory", + hdrs = ["kernel_factory.h"], + deps = [ + ":kernel", + ":kernel_spec", + ":stream_executor_interface", + "@com_google_absl//absl/status:statusor", + "@local_tsl//tsl/platform:errors", + "@local_tsl//tsl/platform:statusor", + ], +) + +cc_library( + name = "typed_kernel_factory", + hdrs = ["typed_kernel_factory.h"], + deps = [ + ":kernel", + ":kernel_factory", + ":kernel_spec", + ":stream_executor_interface", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings:string_view", + "@com_google_absl//absl/types:span", + "@local_tsl//tsl/platform:errors", + "@local_tsl//tsl/platform:statusor", + ], +) + cc_library( name = "scratch_allocator", hdrs = ["scratch_allocator.h"], diff --git a/third_party/xla/xla/stream_executor/gpu/BUILD b/third_party/xla/xla/stream_executor/gpu/BUILD index 19b33618b223d3..8e70a01a43a6ed 100644 --- a/third_party/xla/xla/stream_executor/gpu/BUILD +++ b/third_party/xla/xla/stream_executor/gpu/BUILD @@ -668,6 +668,7 @@ xla_test( "//xla/stream_executor", "//xla/stream_executor:platform", "//xla/stream_executor:platform_manager", + "//xla/stream_executor:typed_kernel_factory", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest", "@local_tsl//tsl/lib/core:status_test_util", @@ -695,6 +696,7 @@ xla_test( "//xla/stream_executor:platform", "//xla/stream_executor:platform_manager", "//xla/stream_executor:trace_command_buffer_factory", + "//xla/stream_executor:typed_kernel_factory", "//xla/stream_executor/gpu:gpu_driver_header", "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc index d1999c819f9927..14c5897118ead7 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc +++ b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer_test.cc @@ -36,6 +36,7 @@ limitations under the License. #include "xla/stream_executor/stream.h" #include "xla/stream_executor/stream_executor.h" #include "xla/stream_executor/trace_command_buffer_factory.h" +#include "xla/stream_executor/typed_kernel_factory.h" #include "tsl/lib/core/status_test_util.h" #include "tsl/platform/errors.h" #include "tsl/platform/status.h" @@ -67,14 +68,16 @@ static MultiKernelLoaderSpec GetAddI32KernelSpec() { return spec; } -using AddI32Kernel = TypedKernel, DeviceMemory, - DeviceMemory>; -using MulI32Kernel = TypedKernel, DeviceMemory, - DeviceMemory>; +using AddI32Kernel = + TypedKernelFactory, DeviceMemory, + DeviceMemory>; +using MulI32Kernel = + TypedKernelFactory, DeviceMemory, + DeviceMemory>; using IncAndCmpKernel = - TypedKernel, DeviceMemory, int32_t>; + TypedKernelFactory, DeviceMemory, int32_t>; -using AddI32Ptrs3 = TypedKernel>; +using AddI32Ptrs3 = TypedKernelFactory>; static constexpr auto nested = CommandBuffer::Mode::kNested; // NOLINT static constexpr auto primary = CommandBuffer::Mode::kPrimary; // NOLINT diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_kernel_test.cc b/third_party/xla/xla/stream_executor/gpu/gpu_kernel_test.cc index 591d417348776f..507fbfa477520f 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_kernel_test.cc +++ b/third_party/xla/xla/stream_executor/gpu/gpu_kernel_test.cc @@ -26,6 +26,7 @@ limitations under the License. #include "xla/stream_executor/platform_manager.h" #include "xla/stream_executor/stream.h" #include "xla/stream_executor/stream_executor.h" +#include "xla/stream_executor/typed_kernel_factory.h" #include "tsl/lib/core/status_test_util.h" #include "tsl/platform/statusor.h" #include "tsl/platform/test.h" @@ -33,8 +34,9 @@ limitations under the License. namespace stream_executor::gpu { TEST(GpuKernelTest, Add) { - using AddI32Kernel = TypedKernel, DeviceMemory, - DeviceMemory>; + using AddI32Kernel = + TypedKernelFactory, DeviceMemory, + DeviceMemory>; auto name = absl::AsciiStrToUpper( xla::PlatformUtil::CanonicalPlatformName("gpu").value()); Platform* platform = PlatformManager::PlatformWithName(name).value(); diff --git a/third_party/xla/xla/stream_executor/kernel.h b/third_party/xla/xla/stream_executor/kernel.h index 67871cc2ba1876..f90ad3fc50fbf5 100644 --- a/third_party/xla/xla/stream_executor/kernel.h +++ b/third_party/xla/xla/stream_executor/kernel.h @@ -1,3 +1,4 @@ +#include "absl/base/attributes.h" /* Copyright 2015 The OpenXLA Authors. Licensed under the Apache License, Version 2.0 (the "License"); @@ -232,6 +233,7 @@ class Kernel { // a dedicated KernelFactory accessible via StreamExecutor. // Creates kernel on a given executor from a given kernel specification. + ABSL_DEPRECATED("Use KernelFactory::Create instead.") static absl::StatusOr> Create( StreamExecutorInterface *executor, const MultiKernelLoaderSpec &spec); @@ -282,6 +284,8 @@ class Kernel { //===----------------------------------------------------------------------===// // Typed kernel //===----------------------------------------------------------------------===// +template +class TypedKernelFactory; // Typed kernel is a typed smart-pointer-like wrapper around untyped Kernel. template @@ -290,6 +294,7 @@ class TypedKernel { static constexpr size_t kNumberOfParameters = sizeof...(Params); // Creates a typed kernel on a given executor from a kernel specification. + ABSL_DEPRECATED("Use TypedKernelFactory::Create instead.") static absl::StatusOr Create(StreamExecutorInterface *executor, const MultiKernelLoaderSpec &spec) { TF_ASSIGN_OR_RETURN(std::unique_ptr kernel, @@ -302,18 +307,21 @@ class TypedKernel { // launch would have to match types of the arguments provided at creation // time. The canonical storage for both ptx and cubin_data should outlive the // lifetime of the kernel. + ABSL_DEPRECATED("Use TypedKernelFactory::Create instead.") static absl::StatusOr Create( StreamExecutorInterface *executor, absl::string_view kernel_name, absl::string_view ptx, absl::Span cubin_data); // Creates a kernel which can be launched with `stream.ThenLaunch(...)` from // an in-process symbol pointer. + ABSL_DEPRECATED("Use TypedKernelFactory::Create instead.") static absl::StatusOr Create(StreamExecutorInterface *executor, absl::string_view kernel_name, void *symbol); // Creates a kernel which can be launched with `stream.ThenLaunch(...)` from // an LLVM IR. + ABSL_DEPRECATED("Use TypedKernelFactory::Create instead.") static absl::StatusOr Create(StreamExecutorInterface *executor, absl::string_view ir, absl::string_view entrypoint, @@ -331,6 +339,7 @@ class TypedKernel { operator bool() const { return static_cast(kernel_); } // NOLINT private: + friend class TypedKernelFactory; explicit TypedKernel(std::unique_ptr kernel) : kernel_(std::move(kernel)) {} diff --git a/third_party/xla/xla/stream_executor/kernel_factory.h b/third_party/xla/xla/stream_executor/kernel_factory.h new file mode 100644 index 00000000000000..2dd955efe5ed2c --- /dev/null +++ b/third_party/xla/xla/stream_executor/kernel_factory.h @@ -0,0 +1,44 @@ +/* Copyright 2024 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_STREAM_EXECUTOR_KERNEL_FACTORY_H_ +#define XLA_STREAM_EXECUTOR_KERNEL_FACTORY_H_ + +#include + +#include "absl/status/statusor.h" +#include "xla/stream_executor/kernel.h" +#include "xla/stream_executor/kernel_spec.h" +#include "xla/stream_executor/stream_executor_interface.h" +#include "tsl/platform/errors.h" +#include "tsl/platform/statusor.h" + +namespace stream_executor { + +// Creates Kernels from kernel specifications. +class KernelFactory { + public: + // Creates kernel on a given executor from a given kernel specification. + static inline absl::StatusOr> Create( + StreamExecutorInterface *executor, const MultiKernelLoaderSpec &spec) { + TF_ASSIGN_OR_RETURN(auto kernel, executor->CreateKernel()); + TF_RETURN_IF_ERROR(executor->GetKernel(spec, kernel.get())); + return kernel; + } +}; + +} // namespace stream_executor + +#endif // XLA_STREAM_EXECUTOR_KERNEL_FACTORY_H_ diff --git a/third_party/xla/xla/stream_executor/typed_kernel_factory.h b/third_party/xla/xla/stream_executor/typed_kernel_factory.h new file mode 100644 index 00000000000000..ac908b13b08e0c --- /dev/null +++ b/third_party/xla/xla/stream_executor/typed_kernel_factory.h @@ -0,0 +1,95 @@ +/* Copyright 2024 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_STREAM_EXECUTOR_TYPED_KERNEL_FACTORY_H_ +#define XLA_STREAM_EXECUTOR_TYPED_KERNEL_FACTORY_H_ + +#include +#include +#include +#include + +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" +#include "xla/stream_executor/kernel.h" +#include "xla/stream_executor/kernel_factory.h" +#include "xla/stream_executor/kernel_spec.h" +#include "xla/stream_executor/stream_executor_interface.h" +#include "tsl/platform/statusor.h" + +namespace stream_executor { + +// This class creates TypedKernel objects for stream executors based on the +// specification. +template +class TypedKernelFactory { + public: + // Creates a typed kernel on a given executor from a kernel specification. + static absl::StatusOr> Create( + StreamExecutorInterface *executor, const MultiKernelLoaderSpec &spec) { + TF_ASSIGN_OR_RETURN(std::unique_ptr kernel, + KernelFactory::Create(executor, spec)); + return TypedKernel(std::move(kernel)); + } + + // Creates a kernel which can be launched with `stream.ThenLaunch(...)` from a + // PTX (and optional CUBIN), such that the types of the arguments provided for + // launch would have to match types of the arguments provided at creation + // time. The canonical storage for both ptx and cubin_data should outlive the + // lifetime of the kernel. + static absl::StatusOr> Create( + StreamExecutorInterface *executor, absl::string_view kernel_name, + absl::string_view ptx, absl::Span cubin_data) { + MultiKernelLoaderSpec loader_spec( + TypedKernel::kNumberOfParameters); + loader_spec.AddCudaPtxInMemory(ptx, kernel_name); + + if (!cubin_data.empty()) { + loader_spec.AddCudaCubinInMemory(cubin_data, kernel_name); + } + + return TypedKernel::Create(executor, loader_spec); + } + + // Creates a kernel which can be launched with `stream.ThenLaunch(...)` from + // an in-process symbol pointer. + static absl::StatusOr> Create( + StreamExecutorInterface *executor, absl::string_view kernel_name, + void *symbol) { + MultiKernelLoaderSpec loader_spec( + TypedKernel::kNumberOfParameters); + loader_spec.AddInProcessSymbol(symbol, kernel_name); + + return TypedKernel::Create(executor, loader_spec); + } + + // Creates a kernel which can be launched with `stream.ThenLaunch(...)` from + // an LLVM IR. + static absl::StatusOr> Create( + StreamExecutorInterface *executor, absl::string_view ir, + absl::string_view entrypoint, absl::string_view kernel_name, + absl::Span options) { + MultiKernelLoaderSpec loader_spec( + TypedKernel::kNumberOfParameters); + loader_spec.AddLlvmHostKernel(ir, entrypoint, kernel_name, options); + + return TypedKernel::Create(executor, loader_spec); + } +}; + +} // namespace stream_executor + +#endif // XLA_STREAM_EXECUTOR_TYPED_KERNEL_FACTORY_H_ From 872ada51766d5e6185dbcea16ed568eb42ee7c8b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 09:17:42 -0700 Subject: [PATCH 117/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633607491 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 5019fe2a5b2fad..c8c55e08ca0d32 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugproto +go/debugonly op { name: "Abort" attr { From 800071f14eab24e141392abb72671c861d4ab6b9 Mon Sep 17 00:00:00 2001 From: Kevin Gleason Date: Tue, 14 May 2024 10:03:21 -0700 Subject: [PATCH 118/478] Integrate StableHLO at openxla/stablehlo@797bee21 PiperOrigin-RevId: 633621411 --- .../tests/compose-uniform-quantized-type.mlir | 14 +- .../uniform-quantized-stablehlo-to-tfl.mlir | 34 +- .../common/attrs_and_constraints_test.cc | 4 +- .../passes/quantize/quantize_same_scale.mlir | 12 +- .../passes/quantize/quantize_weight_only.mlir | 12 +- third_party/stablehlo/temporary.patch | 23389 +--------------- third_party/stablehlo/workspace.bzl | 4 +- .../xla/third_party/stablehlo/temporary.patch | 23389 +--------------- .../xla/third_party/stablehlo/workspace.bzl | 4 +- .../xla/xla/mlir_hlo/mhlo/IR/hlo_base.td | 2 + .../xla/xla/mlir_hlo/mhlo/IR/hlo_ops.td | 48 +- .../mhlo/transforms/map_stablehlo_to_hlo_op.h | 1 - .../mhlo/hlo-legalize-to-stablehlo.mlir | 19 +- .../mhlo/mhlo-quant-legalize-to-int.mlir | 200 +- .../xla/mlir_hlo/tests/Dialect/mhlo/ops.mlir | 297 +- .../mhlo/stablehlo-legalize-to-hlo.mlir | 19 +- 16 files changed, 380 insertions(+), 47068 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/stablehlo/tests/compose-uniform-quantized-type.mlir b/tensorflow/compiler/mlir/lite/stablehlo/tests/compose-uniform-quantized-type.mlir index 20d56eea578d79..7e60dc85a487a6 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/tests/compose-uniform-quantized-type.mlir +++ b/tensorflow/compiler/mlir/lite/stablehlo/tests/compose-uniform-quantized-type.mlir @@ -431,8 +431,8 @@ module { %2 = stablehlo.constant dense<-128> : tensor<1x1x1xi8> // Input 1 zero point (z1). %3 = stablehlo.constant dense<-128> : tensor<1x1x1xi32> // Input 1 zero point (z1) (upcast & folded into i32). %4 = stablehlo.constant dense<4.000000e-01> : tensor<1x1x1xf32> // Input 2 inverse scale (1 / s2). - %5 = stablehlo.constant dense<-3> : tensor<1x1x1xi8> // Input 2 zero point (z2). - %6 = stablehlo.constant dense<-3> : tensor<1x1x1xi32> // Input 2 zero point (z2) (upcast & folded into i32). + %5 = stablehlo.constant dense<0> : tensor<1x1x1xi8> // Input 2 zero point (z2). + %6 = stablehlo.constant dense<0> : tensor<1x1x1xi32> // Input 2 zero point (z2) (upcast & folded into i32). %7 = stablehlo.constant dense<5.000000e-01> : tensor<1x1x1xf32> // Output inverse scale (1 / s3). %8 = stablehlo.constant dense<-5> : tensor<1x1x1xi8> // Output zero point (z3). %9 = stablehlo.constant dense<1.250000e+01> : tensor<1x1x1xf32> // Merged scale (s1 * s2). @@ -454,8 +454,8 @@ module { return %23 : tensor<8x16x4xf32> } // CHECK: %[[UQ_0:.*]] = stablehlo.uniform_quantize %[[ARG_0]] : (tensor<8x16x16xf32>) -> tensor<8x16x16x!quant.uniform> -// CHECK: %[[UQ_1:.*]] = stablehlo.uniform_quantize %[[ARG_1]] : (tensor<8x16x4xf32>) -> tensor<8x16x4x!quant.uniform> -// CHECK: %[[DOT_GENERAL:.*]] = stablehlo.dot_general %[[UQ_0]], %[[UQ_1]], batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<8x16x16x!quant.uniform>, tensor<8x16x4x!quant.uniform>) -> tensor<8x16x4x!quant.uniform> +// CHECK: %[[UQ_1:.*]] = stablehlo.uniform_quantize %[[ARG_1]] : (tensor<8x16x4xf32>) -> tensor<8x16x4x!quant.uniform> +// CHECK: %[[DOT_GENERAL:.*]] = stablehlo.dot_general %[[UQ_0]], %[[UQ_1]], batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<8x16x16x!quant.uniform>, tensor<8x16x4x!quant.uniform>) -> tensor<8x16x4x!quant.uniform> // CHECK: %[[DQ_0:.*]] = stablehlo.uniform_dequantize %[[DOT_GENERAL]] : (tensor<8x16x4x!quant.uniform>) -> tensor<8x16x4xf32> // CHECK: return %[[DQ_0]] @@ -492,7 +492,7 @@ module { %1 = stablehlo.constant dense<2.000000e-01> : tensor<1x1x1xf32> // Input 1 inverse scale (1 / s1). %2 = stablehlo.constant dense<-128> : tensor<1x1x1xi8> // Input 1 zero point (z1). %3 = stablehlo.constant dense<4.000000e-01> : tensor<1x1x1xf32> // Input 2 inverse scale (1 / s2). - %4 = stablehlo.constant dense<-3> : tensor<1x1x1xi8> // Input 2 zero point (z2). + %4 = stablehlo.constant dense<0> : tensor<1x1x1xi8> // Input 2 zero point (z2). %5 = stablehlo.constant dense<5.000000e-01> : tensor<1x1x1xf32> // Output inverse scale (1 / s3). %6 = stablehlo.constant dense<-5> : tensor<1x1x1xi8> // Output zero point (z3). %7 = stablehlo.constant dense<1.250000e+01> : tensor<1x1x1xf32> // Merged scale (s1 * s2). @@ -516,8 +516,8 @@ module { return %23 : tensor<8x16x4xf32> } // CHECK: %[[UQ_0:.*]] = stablehlo.uniform_quantize %[[ARG_0]] : (tensor<8x16x16xf32>) -> tensor<8x16x16x!quant.uniform> -// CHECK: %[[UQ_1:.*]] = stablehlo.uniform_quantize %[[ARG_1]] : (tensor<8x16x4xf32>) -> tensor<8x16x4x!quant.uniform> -// CHECK: %[[DOT_GENERAL:.*]] = stablehlo.dot_general %[[UQ_0]], %[[UQ_1]], batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<8x16x16x!quant.uniform>, tensor<8x16x4x!quant.uniform>) -> tensor<8x16x4x!quant.uniform> +// CHECK: %[[UQ_1:.*]] = stablehlo.uniform_quantize %[[ARG_1]] : (tensor<8x16x4xf32>) -> tensor<8x16x4x!quant.uniform> +// CHECK: %[[DOT_GENERAL:.*]] = stablehlo.dot_general %[[UQ_0]], %[[UQ_1]], batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<8x16x16x!quant.uniform>, tensor<8x16x4x!quant.uniform>) -> tensor<8x16x4x!quant.uniform> // CHECK: %[[DQ_0:.*]] = stablehlo.uniform_dequantize %[[DOT_GENERAL]] : (tensor<8x16x4x!quant.uniform>) -> tensor<8x16x4xf32> // CHECK: return %[[DQ_0]] diff --git a/tensorflow/compiler/mlir/lite/stablehlo/tests/uniform-quantized-stablehlo-to-tfl.mlir b/tensorflow/compiler/mlir/lite/stablehlo/tests/uniform-quantized-stablehlo-to-tfl.mlir index dde72965ae65fc..64b14b85fc7c71 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/tests/uniform-quantized-stablehlo-to-tfl.mlir +++ b/tensorflow/compiler/mlir/lite/stablehlo/tests/uniform-quantized-stablehlo-to-tfl.mlir @@ -393,13 +393,13 @@ func.func @dot_general_upstream_srq_float_operands(%arg0: tensor<1x2x3x4xf32>, % // CHECK-LABEL: dot_general_upstream_srq_asym_weight func.func @dot_general_upstream_srq_asym_weight(%arg0: tensor<1x2x3x4x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> { - %0 = stablehlo.constant() {value = dense<1> : tensor<1x2x4x5xi8>} : () -> tensor<1x2x4x5x!quant.uniform> - %1 = "stablehlo.dot_general"(%arg0, %0) {dot_dimension_numbers = #stablehlo.dot, precision_config = [#stablehlo, #stablehlo]} : (tensor<1x2x3x4x!quant.uniform>, tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> + %0 = stablehlo.constant() {value = dense<1> : tensor<1x2x4x5xi8>} : () -> tensor<1x2x4x5x!quant.uniform> + %1 = "stablehlo.dot_general"(%arg0, %0) {dot_dimension_numbers = #stablehlo.dot, precision_config = [#stablehlo, #stablehlo]} : (tensor<1x2x3x4x!quant.uniform>, tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> return %1 : tensor<1x2x3x5x!quant.uniform> } // CHECK-SAME: %[[ARG:.+]]: tensor<1x2x3x4x!quant.uniform> -// CHECK: %[[QCONST_0:.+]] = "tfl.pseudo_qconst"() <{qtype = tensor<1x2x4x5x!quant.uniform>, value = dense<1> : tensor<1x2x4x5xi8>}> : () -> tensor<1x2x4x5x!quant.uniform> -// CHECK: %[[BMM:.+]] = "tfl.batch_matmul"(%[[ARG]], %[[QCONST_0]]) <{adj_x = false, adj_y = false}> : (tensor<1x2x3x4x!quant.uniform>, tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> +// CHECK: %[[QCONST_0:.+]] = "tfl.pseudo_qconst"() <{qtype = tensor<1x2x4x5x!quant.uniform>, value = dense<1> : tensor<1x2x4x5xi8>}> : () -> tensor<1x2x4x5x!quant.uniform> +// CHECK: %[[BMM:.+]] = "tfl.batch_matmul"(%[[ARG]], %[[QCONST_0]]) <{adj_x = false, adj_y = false}> : (tensor<1x2x3x4x!quant.uniform>, tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> // ----- @@ -427,8 +427,8 @@ func.func @dot_general_upstream_srq_per_axis_quantized_filter(%arg0: tensor<1x3x // CHECK-LABEL: dot_general_upstream_srq_per_axis_quantized_filter_with_batch_dim func.func @dot_general_upstream_srq_per_axis_quantized_filter_with_batch_dim(%arg0: tensor<1x1x3x!quant.uniform>) -> tensor<1x1x2x!quant.uniform> { - %0 = stablehlo.constant() {value = dense<1> : tensor<1x3x2xi8>} : () -> tensor<1x3x2x!quant.uniform> - %1 = stablehlo.dot_general %arg0, %0, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<1x1x3x!quant.uniform>, tensor<1x3x2x!quant.uniform>) -> tensor<1x1x2x!quant.uniform> + %0 = stablehlo.constant() {value = dense<1> : tensor<1x3x2xi8>} : () -> tensor<1x3x2x!quant.uniform> + %1 = stablehlo.dot_general %arg0, %0, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<1x1x3x!quant.uniform>, tensor<1x3x2x!quant.uniform>) -> tensor<1x1x2x!quant.uniform> return %1 : tensor<1x1x2x!quant.uniform> } // Nothing changes. @@ -459,8 +459,8 @@ func.func @dot_general_upstream_srq_per_axis_quantized_filter_multibatch(%arg0: // CHECK-LABEL: dot_general_upstream_srq_per_axis_quantized_filter_with_multiple_contracting_dims func.func @dot_general_upstream_srq_per_axis_quantized_filter_with_multiple_contracting_dims(%arg0: tensor<1x2x3x!quant.uniform>) -> tensor<1x1x!quant.uniform> { - %0 = stablehlo.constant() {value = dense<1> : tensor<1x3x2xi8>} : () -> tensor<1x3x2x!quant.uniform> - %1 = stablehlo.dot_general %arg0, %0, contracting_dims = [1, 2] x [2, 1] : (tensor<1x2x3x!quant.uniform>, tensor<1x3x2x!quant.uniform>) -> tensor<1x1x!quant.uniform> + %0 = stablehlo.constant() {value = dense<1> : tensor<1x3x2xi8>} : () -> tensor<1x3x2x!quant.uniform> + %1 = stablehlo.dot_general %arg0, %0, contracting_dims = [1, 2] x [2, 1] : (tensor<1x2x3x!quant.uniform>, tensor<1x3x2x!quant.uniform>) -> tensor<1x1x!quant.uniform> return %1 : tensor<1x1x!quant.uniform> } // Nothing changes. @@ -557,9 +557,9 @@ func.func @dot_general_srq_constant_transpose_rhs(%arg0: tensor<1x3x!quant.unifo // (e.g. argument), the conversion to `tfl.fully_connected` doesn't happen. // CHECK-LABEL: dot_general_srq_arg_transpose_rhs -func.func @dot_general_srq_arg_transpose_rhs(%arg0: tensor<1x3x!quant.uniform>, %arg1: tensor<2x3x!quant.uniform>) -> tensor<1x2x!quant.uniform> { - %1 = stablehlo.transpose %arg1, dims = [1, 0] : (tensor<2x3x!quant.uniform>) -> tensor<3x2x!quant.uniform> - %2 = stablehlo.dot_general %arg0, %1, contracting_dims = [1] x [0] : (tensor<1x3x!quant.uniform>, tensor<3x2x!quant.uniform>) -> tensor<1x2x!quant.uniform> +func.func @dot_general_srq_arg_transpose_rhs(%arg0: tensor<1x3x!quant.uniform>, %arg1: tensor<2x3x!quant.uniform>) -> tensor<1x2x!quant.uniform> { + %1 = stablehlo.transpose %arg1, dims = [1, 0] : (tensor<2x3x!quant.uniform>) -> tensor<3x2x!quant.uniform> + %2 = stablehlo.dot_general %arg0, %1, contracting_dims = [1] x [0] : (tensor<1x3x!quant.uniform>, tensor<3x2x!quant.uniform>) -> tensor<1x2x!quant.uniform> %3 = stablehlo.uniform_quantize %2 : (tensor<1x2x!quant.uniform>) -> tensor<1x2x!quant.uniform> return %3 : tensor<1x2x!quant.uniform> } @@ -577,7 +577,7 @@ func.func @dot_general_srq_arg_transpose_rhs(%arg0: tensor<1x3x!quant.uniform qi8 requantization is // properly lowered to `tfl.batch_matmul`. -func.func @dot_general_srq_to_batch_matmul(%arg0: tensor<1x2x3x4x!quant.uniform>, %arg1: tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> { +func.func @dot_general_srq_to_batch_matmul(%arg0: tensor<1x2x3x4x!quant.uniform>, %arg1: tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> { %0 = "stablehlo.dot_general"(%arg0, %arg1) { dot_dimension_numbers = #stablehlo.dot< lhs_batching_dimensions = [0, 1], @@ -586,14 +586,14 @@ func.func @dot_general_srq_to_batch_matmul(%arg0: tensor<1x2x3x4x!quant.uniform< rhs_contracting_dimensions = [2] >, precision_config = [#stablehlo, #stablehlo] - } : (tensor<1x2x3x4x!quant.uniform>, tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> + } : (tensor<1x2x3x4x!quant.uniform>, tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> %1 = stablehlo.uniform_quantize %0 : (tensor<1x2x3x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> return %1 : tensor<1x2x3x5x!quant.uniform> } // CHECK-LABEL: dot_general_srq_to_batch_matmul -// CHECK-SAME: (%[[ARG_0:.+]]: tensor<1x2x3x4x!quant.uniform>, %[[ARG_1:.+]]: tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> -// CHECK: %[[BMM:.+]] = "tfl.batch_matmul"(%[[ARG_0]], %[[ARG_1]]) <{adj_x = false, adj_y = false}> : (tensor<1x2x3x4x!quant.uniform>, tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> +// CHECK-SAME: (%[[ARG_0:.+]]: tensor<1x2x3x4x!quant.uniform>, %[[ARG_1:.+]]: tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> +// CHECK: %[[BMM:.+]] = "tfl.batch_matmul"(%[[ARG_0]], %[[ARG_1]]) <{adj_x = false, adj_y = false}> : (tensor<1x2x3x4x!quant.uniform>, tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> // CHECK-NOT: stablehlo.dot_general // CHECK-NOT: stablehlo.uniform_quantize // CHECK-NOT: tfl.fully_connected @@ -606,7 +606,7 @@ func.func @dot_general_srq_to_batch_matmul(%arg0: tensor<1x2x3x4x!quant.uniform< // not converted to `tfl.batch_matmul` when there are multiple use of the // intermediate result. -func.func @dot_general_srq_multiple_use_of_intermediate_result(%arg0: tensor<1x2x3x4x!quant.uniform>, %arg1: tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> { +func.func @dot_general_srq_multiple_use_of_intermediate_result(%arg0: tensor<1x2x3x4x!quant.uniform>, %arg1: tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> { %0 = "stablehlo.dot_general"(%arg0, %arg1) { dot_dimension_numbers = #stablehlo.dot< lhs_batching_dimensions = [0, 1], @@ -615,7 +615,7 @@ func.func @dot_general_srq_multiple_use_of_intermediate_result(%arg0: tensor<1x2 rhs_contracting_dimensions = [2] >, precision_config = [#stablehlo, #stablehlo] - } : (tensor<1x2x3x4x!quant.uniform>, tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> + } : (tensor<1x2x3x4x!quant.uniform>, tensor<1x2x4x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> %1 = stablehlo.uniform_quantize %0 : (tensor<1x2x3x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> %2 = stablehlo.uniform_quantize %0 : (tensor<1x2x3x5x!quant.uniform>) -> tensor<1x2x3x5x!quant.uniform> %3 = stablehlo.add %1, %2 : tensor<1x2x3x5x!quant.uniform> diff --git a/tensorflow/compiler/mlir/quantization/common/attrs_and_constraints_test.cc b/tensorflow/compiler/mlir/quantization/common/attrs_and_constraints_test.cc index ad0fc81157401f..720616309afe38 100644 --- a/tensorflow/compiler/mlir/quantization/common/attrs_and_constraints_test.cc +++ b/tensorflow/compiler/mlir/quantization/common/attrs_and_constraints_test.cc @@ -143,8 +143,8 @@ constexpr absl::string_view kModulePartitionedCall = R"mlir( constexpr absl::string_view kModuleHybridQuantized = R"mlir( module { - func.func @main(%arg0: tensor<1x2xf32>, %arg1: tensor<2x3x!quant.uniform> {tf_saved_model.index_path = ["input_tensor"]}) -> (tensor<1x3xf32>) { - %0 = stablehlo.dot_general %arg0, %arg1, contracting_dims = [1] x [0] : (tensor<1x2xf32>, tensor<2x3x!quant.uniform>) -> tensor<1x3xf32> + func.func @main(%arg0: tensor<1x2xf32>, %arg1: tensor<2x3x!quant.uniform> {tf_saved_model.index_path = ["input_tensor"]}) -> (tensor<1x3xf32>) { + %0 = stablehlo.dot_general %arg0, %arg1, contracting_dims = [1] x [0] : (tensor<1x2xf32>, tensor<2x3x!quant.uniform>) -> tensor<1x3xf32> return %0 : tensor<1x3xf32> } } diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/tests/passes/quantize/quantize_same_scale.mlir b/tensorflow/compiler/mlir/quantization/stablehlo/tests/passes/quantize/quantize_same_scale.mlir index 25aab3044a3496..7a905dfbe58a9e 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/tests/passes/quantize/quantize_same_scale.mlir +++ b/tensorflow/compiler/mlir/quantization/stablehlo/tests/passes/quantize/quantize_same_scale.mlir @@ -291,17 +291,17 @@ module attributes {tf_saved_model.semantics} { // CHECK-SAME: %[[ARG2:.*]]: tensor<2x3x2xi64> func.func private @composite_and_gather(%arg0: tensor<3x4x5xf32>, %arg1: tensor<3x5x2xf32>, %arg2: tensor<2x3x2xi64>) -> tensor<2x3x2x2xf32> { // CHECK: %[[Q1:.*]] = "quantfork.qcast"(%[[ARG0]]) {volatile} : (tensor<3x4x5xf32>) -> tensor<3x4x5x!quant.uniform> - // CHECK: %[[Q2:.*]] = "quantfork.qcast"(%[[ARG1]]) {volatile} : (tensor<3x5x2xf32>) -> tensor<3x5x2x!quant.uniform:f32, 6.000000e-03:13>> + // CHECK: %[[Q2:.*]] = "quantfork.qcast"(%[[ARG1]]) {volatile} : (tensor<3x5x2xf32>) -> tensor<3x5x2x!quant.uniform:f32, 6.000000e-03>> // CHECK: %[[CALL:.*]] = call @quantized_dot_general_fn_1(%[[Q1]], %[[Q2]]) - // CHECK-SAME: (tensor<3x4x5x!quant.uniform>, tensor<3x5x2x!quant.uniform:f32, 6.000000e-03:13>>) -> tensor<3x4x2x!quant.uniform> + // CHECK-SAME: (tensor<3x4x5x!quant.uniform>, tensor<3x5x2x!quant.uniform:f32, 6.000000e-03>>) -> tensor<3x4x2x!quant.uniform> // CHECK: %[[GATHER:.*]] = "stablehlo.gather"(%[[CALL]], %[[ARG2]]) // CHECK-SAME: (tensor<3x4x2x!quant.uniform>, tensor<2x3x2xi64>) -> tensor<2x3x2x2x!quant.uniform> // CHECK: %[[DQ:.*]] = "quantfork.dcast"(%[[GATHER]]) : (tensor<2x3x2x2x!quant.uniform>) -> tensor<2x3x2x2xf32> // CHECK: return %[[DQ]] %0 = "quantfork.qcast"(%arg0) {volatile} : (tensor<3x4x5xf32>) -> tensor<3x4x5x!quant.uniform> %1 = "quantfork.dcast"(%0) : (tensor<3x4x5x!quant.uniform>) -> tensor<3x4x5xf32> - %2 = "quantfork.qcast"(%arg1) {volatile} : (tensor<3x5x2xf32>) -> tensor<3x5x2x!quant.uniform:f32, 6.000000e-03:13>> - %3 = "quantfork.dcast"(%2) : (tensor<3x5x2x!quant.uniform:f32, 6.000000e-03:13>>) -> tensor<3x5x2xf32> + %2 = "quantfork.qcast"(%arg1) {volatile} : (tensor<3x5x2xf32>) -> tensor<3x5x2x!quant.uniform:f32, 6.000000e-03>> + %3 = "quantfork.dcast"(%2) : (tensor<3x5x2x!quant.uniform:f32, 6.000000e-03>>) -> tensor<3x5x2xf32> %4 = "tf.XlaCallModule"(%1, %3) {Sout = [#tf_type.shape<1x3>], _entry_function = @composite_dot_general_fn_1, _original_entry_function = "composite_dot_general_fn_1", _quantization_method = "static_range_ptq {}", _stablehlo_module_attrs = {}, _tfl_quant_trait = "fully_quantizable", device = "", dim_args_spec = [], disabled_checks = [], has_token_input_output = false, module = "", platforms = [], version = 5 : i64} : (tensor<3x4x5xf32>, tensor<3x5x2xf32>) -> tensor<3x4x2xf32> %5 = "quantfork.qcast"(%4) {volatile} : (tensor<3x4x2xf32>) -> tensor<3x4x2x!quant.uniform> %6 = "quantfork.dcast"(%5) : (tensor<3x4x2x!quant.uniform>) -> tensor<3x4x2xf32> @@ -321,10 +321,10 @@ module attributes {tf_saved_model.semantics} { // CHECK: quantized_dot_general_fn_1 // CHECK-SAME: %[[ARG2:.*]]: tensor<3x4x5x!quant.uniform> - // CHECK-SAME: %[[ARG3:.*]]: tensor<3x5x2x!quant.uniform:f32, 6.000000e-03:13>> + // CHECK-SAME: %[[ARG3:.*]]: tensor<3x5x2x!quant.uniform:f32, 6.000000e-03>> func.func private @composite_dot_general_fn_1(%arg0: tensor<3x4x5xf32>, %arg1: tensor<3x5x2xf32>) -> tensor<3x4x2xf32> attributes {_from_xla_call_module} { // CHECK: %[[DOT:.*]] = stablehlo.dot_general %[[ARG2]], %[[ARG3]] - // CHECK-SAME: (tensor<3x4x5x!quant.uniform>, tensor<3x5x2x!quant.uniform:f32, 6.000000e-03:13>>) -> tensor<3x4x2x!quant.uniform> + // CHECK-SAME: (tensor<3x4x5x!quant.uniform>, tensor<3x5x2x!quant.uniform:f32, 6.000000e-03>>) -> tensor<3x4x2x!quant.uniform> // CHECK: %[[Q3:.*]] = stablehlo.uniform_quantize %0 : (tensor<3x4x2x!quant.uniform>) -> tensor<3x4x2x!quant.uniform> // CHECK: return %[[Q3]] %0 = stablehlo.dot_general %arg0, %arg1, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<3x4x5xf32>, tensor<3x5x2xf32>) -> tensor<3x4x2xf32> diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/tests/passes/quantize/quantize_weight_only.mlir b/tensorflow/compiler/mlir/quantization/stablehlo/tests/passes/quantize/quantize_weight_only.mlir index 78a48a2d9373e1..e152a90ce72c3a 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/tests/passes/quantize/quantize_weight_only.mlir +++ b/tensorflow/compiler/mlir/quantization/stablehlo/tests/passes/quantize/quantize_weight_only.mlir @@ -6,8 +6,8 @@ module attributes {tf_saved_model.semantics} { func.func private @quantize_dot_general_fn(%arg0: tensor<1x2xf32>) -> tensor<1x3xf32> attributes {tf._original_func_name = "main_0"} { %cst = stablehlo.constant dense<3.000000e-01> : tensor<2x3xf32> - %0 = "quantfork.qcast"(%cst) : (tensor<2x3xf32>) -> tensor<2x3x!quant.uniform> - %1 = "quantfork.dcast"(%0) : (tensor<2x3x!quant.uniform>) -> tensor<2x3xf32> + %0 = "quantfork.qcast"(%cst) : (tensor<2x3xf32>) -> tensor<2x3x!quant.uniform> + %1 = "quantfork.dcast"(%0) : (tensor<2x3x!quant.uniform>) -> tensor<2x3xf32> %2 = "tf.XlaCallModule"(%arg0, %1) <{Sout = [#tf_type.shape<1x3>], dim_args_spec = [], disabled_checks = [], has_token_input_output = false, module = "", platforms = [], version = 5 : i64}> {_entry_function = @composite_dot_general_fn, _original_entry_function = "composite_dot_general_fn", _quantization_method = "weight_only_ptq { }", _stablehlo_module_attrs = {}, _tfl_quant_trait = "fully_quantizable", device = ""} : (tensor<1x2xf32>, tensor<2x3xf32>) -> tensor<1x3xf32> return %2 : tensor<1x3xf32> } @@ -21,15 +21,15 @@ module attributes {tf_saved_model.semantics} { // CHECK-LABEL: quantize_dot_general_fn // CHECK-SAME: %[[ARG0:.+]]: tensor<1x2xf32> // CHECK: %[[CST:.+]] = stablehlo.constant dense<3.000000e-01> : tensor<2x3xf32> -// CHECK: %[[Q:.+]] = "quantfork.qcast"(%[[CST]]) : (tensor<2x3xf32>) -> tensor<2x3x!quant.uniform> +// CHECK: %[[Q:.+]] = "quantfork.qcast"(%[[CST]]) : (tensor<2x3xf32>) -> tensor<2x3x!quant.uniform> // CHECK: %[[CALL:.+]] = call @quantized_dot_general_fn(%[[ARG0]], %[[Q]]) -// CHECK-SAME: {_quantization_method = "weight_only_ptq { }"} : (tensor<1x2xf32>, tensor<2x3x!quant.uniform>) -> tensor<1x3xf32> +// CHECK-SAME: {_quantization_method = "weight_only_ptq { }"} : (tensor<1x2xf32>, tensor<2x3x!quant.uniform>) -> tensor<1x3xf32> // CHECK: return %[[CALL]] // CHECK: quantized_dot_general_fn -// CHECK-SAME: (%[[ARG1:.+]]: tensor<1x2xf32>, %[[ARG2:.+]]: tensor<2x3x!quant.uniform>) -> tensor<1x3xf32> +// CHECK-SAME: (%[[ARG1:.+]]: tensor<1x2xf32>, %[[ARG2:.+]]: tensor<2x3x!quant.uniform>) -> tensor<1x3xf32> // CHECK: %[[DOT:.+]] = stablehlo.dot_general %[[ARG1]], %[[ARG2]] -// CHECK-SAME: (tensor<1x2xf32>, tensor<2x3x!quant.uniform>) -> tensor<1x3xf32> +// CHECK-SAME: (tensor<1x2xf32>, tensor<2x3x!quant.uniform>) -> tensor<1x3xf32> // CHECK: return %[[DOT]] // ----- diff --git a/third_party/stablehlo/temporary.patch b/third_party/stablehlo/temporary.patch index f906a856ae69e1..abb329aec579e4 100755 --- a/third_party/stablehlo/temporary.patch +++ b/third_party/stablehlo/temporary.patch @@ -164,81 +164,6 @@ diff --ruN a/stablehlo/CMakeLists.txt b/stablehlo/CMakeLists.txt #------------------------------------------------------------------------------- # Directory setup -diff --ruN a/stablehlo/docs/spec.md b/stablehlo/docs/spec.md ---- stablehlo/docs/spec.md -+++ stablehlo/docs/spec.md -@@ -2532,10 +2532,10 @@ - rhs_batching_dimensions, lhs_contracting_dimensions, - rhs_contracting_dimensions, precision_config), lhs, rhs, type(result))`. - --For hybrid quantized types, performs `hybrid_dequantize_then_op( -- lambda lhs, rhs: dot_general(lhs, rhs, lhs_batching_dimensions, -- rhs_batching_dimensions, lhs_contracting_dimensions, -- rhs_contracting_dimensions, precision_config), lhs, rhs)`. -+This only specifies semantics for per-tensor quantization. Per-axis quantization -+is work in progress ([#1574](https://github.com/openxla/stablehlo/issues/1574)). -+Also, in the future we may consider adding support for hybrid quantization -+ ([#1575](https://github.com/openxla/stablehlo/issues/1575)). - - `precision_config` controls the tradeoff between speed and accuracy for - computations on accelerator backends. This can be one of the following (at the -@@ -2552,21 +2552,21 @@ - - #### Inputs - --| Label | Name | Type | Constraints | --|-------|------------------------------|--------------------------------------------------------------|------------------------------------------------| --| (I1) | `lhs` | tensor or per-tensor quantized tensor | (C5-C6), (C9-C10), (C12-C14), (C17-C18), (C20) | --| (I2) | `rhs` | tensor or quantized tensor | (C7-C10), (C12-C20) | --| (I3) | `lhs_batching_dimensions` | 1-dimensional tensor constant of type `si64` | (C1), (C3), (C5), (C9), (C12) | --| (I4) | `rhs_batching_dimensions` | 1-dimensional tensor constant of type `si64` | (C1), (C4), (C7), (C9) | --| (I5) | `lhs_contracting_dimensions` | 1-dimensional tensor constant of type `si64` | (C2), (C3), (C6), (C10) | --| (I6) | `rhs_contracting_dimensions` | 1-dimensional tensor constant of type `si64` | (C2), (C4), (C8), (C10), (C16) | --| (I7) | `precision_config` | variadic number of enums of `DEFAULT`, `HIGH`, and `HIGHEST` | (C11) | -+| Label | Name | Type | Constraints | -+|-------|------------------------------|--------------------------------------------------------------|--------------------------------| -+| (I1) | `lhs` | tensor or per-tensor quantized tensor | (C5-C6), (C9-C10), (C12-C16) | -+| (I2) | `rhs` | tensor or quantized tensor | (C7-C10), (C12), (C18-C19) | -+| (I3) | `lhs_batching_dimensions` | 1-dimensional tensor constant of type `si64` | (C1), (C3), (C5), (C9), (C12) | -+| (I4) | `rhs_batching_dimensions` | 1-dimensional tensor constant of type `si64` | (C1), (C4), (C7), (C9) | -+| (I5) | `lhs_contracting_dimensions` | 1-dimensional tensor constant of type `si64` | (C2), (C3), (C6), (C10) | -+| (I6) | `rhs_contracting_dimensions` | 1-dimensional tensor constant of type `si64` | (C2), (C4), (C8), (C10), (C19) | -+| (I7) | `precision_config` | variadic number of enums of `DEFAULT`, `HIGH`, and `HIGHEST` | (C11) | - - #### Outputs - - | Name | Type | Constraints | - |----------|----------------------------|----------------------------| --| `result` | tensor or quantized tensor | (C12), (C14), (C18-C20) | -+| `result` | tensor or quantized tensor | (C12), (C14), (C16), (C18) | - - #### Constraints - -@@ -2589,17 +2589,14 @@ - * If the operation uses non-quantized tensors: - * (C13) `element_type(lhs) = element_type(rhs)`. - * If the operation uses quantized tensors: -- * (C14) `is_quantized(lhs) = is_quantized(result) and is_quantized(rhs)`. -- * (C15) `zero_points(rhs) = 0`. -- * (C16) If `is_per_axis_quantized(rhs)`, then -+ * (C14) `is_quantized(lhs) and is_quantized(rhs) and is_quantized(result)`. -+ * (C15) `storage_type(lhs) = storage_type(rhs)`. -+ * (C16) `expressed_type(lhs) = expressed_type(rhs) = expressed_type(result)`. -+ * (C17) `zero_points(rhs) = 0`. -+ * (C18) If `is_per_tensor_quantized(rhs)`, then -+ `is_per_tensor_quantized(result)`. -+ * (C19) If `is_per_axis_quantized(rhs)`, then - `quantization_dimension(rhs)` not in `rhs_contracting_dimensions`. -- * If `is_quantized(lhs)`: -- * (C17) `storage_type(lhs) = storage_type(rhs)`. -- * (C18) `expressed_type(lhs) = expressed_type(rhs) = expressed_type(result)`. -- * (C19) If `is_per_tensor_quantized(rhs)`, then -- `is_per_tensor_quantized(result)`. -- * If `!is_quantized(lhs)`: -- * (C20) `element_type(lhs) = expressed_type(rhs) = element_type(result)`. - - #### Examples - diff --ruN a/stablehlo/stablehlo/CMakeLists.txt b/stablehlo/stablehlo/CMakeLists.txt --- stablehlo/stablehlo/CMakeLists.txt +++ stablehlo/stablehlo/CMakeLists.txt @@ -250,118 +175,10 @@ diff --ruN a/stablehlo/stablehlo/CMakeLists.txt b/stablehlo/stablehlo/CMakeLists add_subdirectory(integrations) add_subdirectory(reference) add_subdirectory(tests) -diff --ruN a/stablehlo/stablehlo/conversions/linalg/tests/convolution.mlir b/stablehlo/stablehlo/conversions/linalg/tests/convolution.mlir ---- stablehlo/stablehlo/conversions/linalg/tests/convolution.mlir -+++ stablehlo/stablehlo/conversions/linalg/tests/convolution.mlir -@@ -356,7 +356,7 @@ - } - // CHECK-DAG: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0, 1, 2, 3]] : tensor<2x2x1x6xf32> into tensor<24xf32> --// CHECK: %[[EXPAND:.+]] = tensor.expand_shape %[[COLLAPSE]] {{\[}}[0, 1, 2, 3]] : tensor<24xf32> into tensor<2x2x2x3xf32> -+// CHECK: %[[EXPAND:.+]] = tensor.expand_shape %[[COLLAPSE]] {{\[}}[0, 1, 2, 3]] output_shape [2, 2, 2, 3] : tensor<24xf32> into tensor<2x2x2x3xf32> - // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<2x3x4x2x3xf32> - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> - // CHECK: %[[OUT:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm -diff --ruN a/stablehlo/stablehlo/conversions/linalg/tests/miscellaneous.mlir b/stablehlo/stablehlo/conversions/linalg/tests/miscellaneous.mlir ---- stablehlo/stablehlo/conversions/linalg/tests/miscellaneous.mlir -+++ stablehlo/stablehlo/conversions/linalg/tests/miscellaneous.mlir -@@ -865,7 +865,7 @@ - %0 = "stablehlo.reshape"(%arg0) : (tensor) -> tensor<1xi32> - func.return %0 : tensor<1xi32> - } --// CHECK: tensor.expand_shape %{{.*}} [] : tensor into tensor<1xi32> -+// CHECK: tensor.expand_shape %{{.*}} [] output_shape [1] : tensor into tensor<1xi32> - - // ----- - -@@ -876,7 +876,7 @@ - func.return %0 : tensor<1xui32> - } - // CHECK: %[[ARG_SIGNLESS:.*]] = builtin.unrealized_conversion_cast %[[ARG_UNSIGNED]] : tensor to tensor --// CHECK: %[[RET_SIGNLESS:.*]] = tensor.expand_shape %[[ARG_SIGNLESS]] [] : tensor into tensor<1xi32> -+// CHECK: %[[RET_SIGNLESS:.*]] = tensor.expand_shape %[[ARG_SIGNLESS]] [] output_shape [1] : tensor into tensor<1xi32> - // CHECK: %[[RET_UNSIGNED:.*]] = builtin.unrealized_conversion_cast %[[RET_SIGNLESS]] : tensor<1xi32> to tensor<1xui32> - // CHECK: return %[[RET_UNSIGNED]] : tensor<1xui32> - -@@ -978,7 +978,7 @@ - } - // CHECK: %[[FLATTEN:.*]] = tensor.collapse_shape %{{.*}} {{\[}}[0, 1]] : tensor into tensor - // CHECK: %[[CAST:.*]] = tensor.cast %[[FLATTEN]] : tensor to tensor<40xf32> --// CHECK: tensor.expand_shape %[[CAST]] {{\[}}[0, 1, 2]] : tensor<40xf32> into tensor<2x4x5xf32> -+// CHECK: tensor.expand_shape %[[CAST]] {{\[}}[0, 1, 2]] output_shape [2, 4, 5] : tensor<40xf32> into tensor<2x4x5xf32> - - // ----- - -@@ -988,7 +988,7 @@ - func.return %0 : tensor<1x3xi32> - } - // CHECK: %[[CAST:.*]] = tensor.cast %{{.*}} : tensor to tensor<3xi32> --// CHECK: tensor.expand_shape %[[CAST]] {{\[}}[0, 1]] : tensor<3xi32> into tensor<1x3xi32> -+// CHECK: tensor.expand_shape %[[CAST]] {{\[}}[0, 1]] output_shape [1, 3] : tensor<3xi32> into tensor<1x3xi32> - - // ----- - -diff --ruN a/stablehlo/stablehlo/conversions/linalg/tests/random.mlir b/stablehlo/stablehlo/conversions/linalg/tests/random.mlir ---- stablehlo/stablehlo/conversions/linalg/tests/random.mlir -+++ stablehlo/stablehlo/conversions/linalg/tests/random.mlir -@@ -480,8 +480,8 @@ - // CHECK-DAG: %[[VAL_101:.*]] = arith.xori %[[VAL_100]], %[[VAL_87]] : i32 - - // CHECK: linalg.yield %[[YIELDED_1:.*]], %[[YIELDED_2:.*]] : i64, i64 --// CHECK-DAG: %[[VAL_206:.*]] = tensor.expand_shape %[[VAL_207:.*]]#0 {{\[\[}}0, 1]] : tensor<4xi64> into tensor<4x1xi64> --// CHECK-DAG: %[[VAL_208:.*]] = tensor.expand_shape %[[VAL_207]]#1 {{\[\[}}0, 1]] : tensor<4xi64> into tensor<4x1xi64> -+// CHECK-DAG: %[[VAL_206:.*]] = tensor.expand_shape %[[VAL_207:.*]]#0 {{\[\[}}0, 1]] -+// CHECK-DAG: %[[VAL_208:.*]] = tensor.expand_shape %[[VAL_207]]#1 {{\[\[}}0, 1]] - // CHECK-DAG: %[[VAL_209:.*]] = tensor.empty() : tensor<4x2xi64> - // CHECK-DAG: %[[VAL_213:.*]] = tensor.insert %[[VAL_30]] into %[[VAL_0]]{{\[}}%[[VAL_19]]] : tensor<2xi64> - -@@ -575,10 +575,10 @@ - // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %[[CONCAT]] - - --// CHECK: %[[VAL_213:.*]] = tensor.expand_shape %[[COLLAPSE]] {{\[\[}}0, 1]] : tensor<80xi32> into tensor<80x1xi32> -+// CHECK: %[[VAL_213:.*]] = tensor.expand_shape %[[COLLAPSE]] {{\[\[}}0, 1]] - // CHECK: %[[VAL_214:.*]] = tensor.extract_slice %[[VAL_213]][0, 0] [77, 1] [1, 1] : tensor<80x1xi32> to tensor<77x1xi32> - // CHECK: %[[VAL_215:.*]] = tensor.collapse_shape %[[VAL_214]] {{\[\[}}0, 1]] : tensor<77x1xi32> into tensor<77xi32> --// CHECK: %[[VAL_216:.*]] = tensor.expand_shape %[[VAL_215]] {{\[\[}}0, 1]] : tensor<77xi32> into tensor<7x11xi32> -+// CHECK: %[[VAL_216:.*]] = tensor.expand_shape %[[VAL_215]] {{\[\[}}0, 1]] - // CHECK: %[[VAL_217:.*]] = tensor.insert %[[NEWSTATE]] into %[[ARG0]]{{\[}}%[[C1]]] : tensor<2xi64> - // CHECK: return %[[VAL_217]], %[[VAL_216]] : tensor<2xi64>, tensor<7x11xi32> - -@@ -616,10 +616,10 @@ - // CHECK-DAG: %[[COLLAPSE:.+]] = tensor.collapse_shape %[[CONCAT]] {{\[\[}}0, 1]] : tensor<8x2xi64> into tensor<16xi64> - - --// CHECK-DAG: %[[EXPANDED:.*]] = tensor.expand_shape %[[COLLAPSE]] {{\[\[}}0, 1]] : tensor<16xi64> into tensor<16x1xi64> -+// CHECK-DAG: %[[EXPANDED:.*]] = tensor.expand_shape %[[COLLAPSE]] {{\[\[}}0, 1]] - // CHECK-DAG: %[[SLICE:.*]] = tensor.extract_slice %[[EXPANDED]][0, 0] [15, 1] [1, 1] : tensor<16x1xi64> to tensor<15x1xi64> - // CHECK-DAG: %[[EXPAND_2:.*]] = tensor.collapse_shape %[[SLICE]] {{\[\[}}0, 1]] : tensor<15x1xi64> into tensor<15xi64> --// CHECK-DAG: %[[RESHAPE:.*]] = tensor.expand_shape %[[EXPAND_2]] {{\[\[}}0, 1]] : tensor<15xi64> into tensor<3x5xi64> -+// CHECK-DAG: %[[RESHAPE:.*]] = tensor.expand_shape %[[EXPAND_2]] {{\[\[}}0, 1]] - // CHECK-DAG: %[[INSERTED:.+]] = tensor.insert %[[NEWSTATE]] into %[[ARG0]][%[[C1]]] : tensor<2xi64> - // CHECK: return %[[INSERTED]], %[[RESHAPE]] - diff --ruN a/stablehlo/stablehlo/conversions/tosa/tests/binary.mlir b/stablehlo/stablehlo/conversions/tosa/tests/binary.mlir --- stablehlo/stablehlo/conversions/tosa/tests/binary.mlir +++ stablehlo/stablehlo/conversions/tosa/tests/binary.mlir -@@ -45,14 +45,14 @@ - - // CHECK-LABEL: @divide - func.func @divide(%arg0 : tensor<10xi32>, %arg1 : tensor<10xi32>) -> tensor<10xi32> { -- // CHECK: tosa.div -+ // CHECK: tosa.int_div - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor<10xi32>, tensor<10xi32>) -> tensor<10xi32> - return %0 : tensor<10xi32> - } - - // CHECK-LABEL: @divide_f32 - func.func @divide_f32(%arg0 : tensor<10xf32>, %arg1 : tensor<10xf32>) -> tensor<10xf32> { -- // tosa.div only supports i32, so this should not legalize. -+ // tosa.int_div only supports i32, so this should not legalize. - // CHECK: stablehlo.divide - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10xf32> - return %0 : tensor<10xf32> -@@ -123,7 +123,7 @@ +@@ -155,7 +155,7 @@ // CHECK-LABEL: @maximum_f64 func.func @maximum_f64(%arg0 : tensor<10xf64>, %arg1 : tensor<10xf64>) -> tensor<10xf64> { @@ -383,162 +200,6 @@ diff --ruN a/stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir b/stablehlo %0 = stablehlo.constant dense<0.000000e+00> : tensor<10xf64> return %0 : tensor<10xf64> } -diff --ruN a/stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll b/stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll ---- stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll -+++ stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll -@@ -125,7 +125,7 @@ - Pattern => - replace op(input0 : Value<_: Tosa_Int32Tensor>, - input1 : Value<_: Tosa_Int32Tensor>) -- with op(input0, input1); -+ with op(input0, input1); - Pattern => - replace op(input0 : Value<_: Tosa_Tensor>, - input1 : Value<_: Tosa_Tensor>) -diff --ruN a/stablehlo/stablehlo/dialect/Base.cpp b/stablehlo/stablehlo/dialect/Base.cpp ---- stablehlo/stablehlo/dialect/Base.cpp -+++ stablehlo/stablehlo/dialect/Base.cpp -@@ -651,14 +651,14 @@ - - // quantized_type_c5 - auto maxPosFiniteNum = -- APFloat::getLargest(quantizedElementType.getExpressedType() -- .cast() -- .getFloatSemantics()) -+ APFloat::getLargest( -+ cast(quantizedElementType.getExpressedType()) -+ .getFloatSemantics()) - .convertToDouble(); - auto minPosFiniteNum = -- APFloat::getSmallest(quantizedElementType.getExpressedType() -- .cast() -- .getFloatSemantics()) -+ APFloat::getSmallest( -+ cast(quantizedElementType.getExpressedType()) -+ .getFloatSemantics()) - .convertToDouble(); - if (llvm::any_of(scales, [&](double scale) { - return scale < minPosFiniteNum || scale > maxPosFiniteNum; -diff --ruN a/stablehlo/stablehlo/dialect/TypeInference.cpp b/stablehlo/stablehlo/dialect/TypeInference.cpp ---- stablehlo/stablehlo/dialect/TypeInference.cpp -+++ stablehlo/stablehlo/dialect/TypeInference.cpp -@@ -171,7 +171,7 @@ - LogicalResult verifyConvolutionDotGeneralCommonQuantizationConstraints( - std::optional location, Type lhsElementType, Type rhsElementType, - Type resultElementType) { -- // convolution_c28 and dot_general_c14 -+ // convolution_c28 - if (!isa(rhsElementType) || - (isa(lhsElementType) != - isa(resultElementType))) { -@@ -184,19 +184,19 @@ - auto rhsQuantType = cast(rhsElementType); - if (auto lhsQuantType = dyn_cast(lhsElementType)) { - auto resultQuantType = cast(resultElementType); -- // convolution_c31 and dot_general_c17 -+ // convolution_c31 - if (lhsQuantType.getStorageType() != rhsQuantType.getStorageType()) { - return emitOptionalError( - location, "mismatched lhs and rhs quantization storage types"); - } -- // convolution_c32 and dot_general_c18 -+ // convolution_c32 - if (lhsQuantType.getExpressedType() != rhsQuantType.getExpressedType() || - lhsQuantType.getExpressedType() != resultQuantType.getExpressedType()) { - return emitOptionalError( - location, - "mismatched lhs, rhs and result quantization expressed types"); - } -- // convolution_c33 and dot_general_c19 -+ // convolution_c33 - if (isa(rhsQuantType) && - !isa(resultQuantType)) { - return emitOptionalError( -@@ -204,7 +204,7 @@ - } - } else { - Type rhsExpressedType = rhsQuantType.getExpressedType(); -- // convolution_c34 and dot_general_c20 -+ // convolution_c34 - if (lhsElementType != rhsExpressedType || - lhsElementType != resultElementType) { - return emitOptionalError(location, -@@ -3559,7 +3559,7 @@ - } - } - -- // convolution_c28, convolution_c31 - convolution_c34 -+ // convolution_c31 - convolution_c34 - return verifyConvolutionDotGeneralCommonQuantizationConstraints( - location, lhsElementType, rhsElementType, resultElementType); - } -@@ -3626,41 +3626,6 @@ - return success(); - } - --LogicalResult verifyDotGeneralOpQuantizationConstraints( -- std::optional location, Type lhsType, Type rhsType, -- Type resultType, ArrayRef rhsContractingDimensions) { -- Type lhsElementType = getElementTypeOrSelf(lhsType); -- Type rhsElementType = getElementTypeOrSelf(rhsType); -- Type resultElementType = getElementTypeOrSelf(resultType); -- -- // dot_general_c15 -- if (auto rhsPerTensorQuantType = -- dyn_cast(rhsElementType)) { -- if (rhsPerTensorQuantType.getZeroPoint() != 0) { -- return emitOptionalError(location, "Zero point of rhs should be 0"); -- } -- } else if (auto rhsPerAxisQuantType = -- dyn_cast(rhsElementType)) { -- if (llvm::any_of(rhsPerAxisQuantType.getZeroPoints(), -- [](int64_t zero_point) { return zero_point != 0; })) { -- return emitOptionalError(location, "Zero points of rhs should be 0"); -- } -- -- // dot_general_c16 -- if (llvm::is_contained(rhsContractingDimensions, -- rhsPerAxisQuantType.getQuantizedDimension())) { -- return emitOptionalError( -- location, -- "Quantization dimension of rhs should not be in the " -- "contracting dimension of rhs"); -- } -- } -- -- // dot_general_c14, dot_general_c17 - dot_general_c20 -- return verifyConvolutionDotGeneralCommonQuantizationConstraints( -- location, lhsElementType, rhsElementType, resultElementType); --} -- - LogicalResult verifyDotGeneralOp(std::optional location, Value lhs, - Value rhs, - ArrayRef lhsBatchingDimensions, -@@ -3683,13 +3648,6 @@ - return emitOptionalError( - location, "inferred shape '", dimSizesToString(inferredShape.getDims()), - "' ", "is incompatible with return type of operation ", resultType, ""); -- -- Type lhsType = lhs.getType(); -- Type rhsType = rhs.getType(); -- if (anyQuantized({lhsType, rhsType, resultType})) { -- return verifyDotGeneralOpQuantizationConstraints( -- location, lhsType, rhsType, resultType, rhsContractingDimensions); -- } - return success(); - } - -@@ -3861,8 +3819,8 @@ - if (SmallVector shape; operandType.hasStaticShape() && - matchInts(outputShape, shape).succeeded()) { - int64_t operandCount = operandType.getNumElements(); -- int64_t shapeCount = std::accumulate(shape.begin(), shape.end(), 1, -- std::multiplies()); -+ int64_t shapeCount = std::accumulate( -+ shape.begin(), shape.end(), int64_t{1}, std::multiplies()); - if (operandCount != shapeCount) { - return emitOptionalError(location, - "output_shape is incompatible with input type " diff --ruN a/stablehlo/stablehlo/experimental/BUILD.bazel b/stablehlo/stablehlo/experimental/BUILD.bazel --- stablehlo/stablehlo/experimental/BUILD.bazel +++ stablehlo/stablehlo/experimental/BUILD.bazel @@ -2928,23052 +2589,4 @@ diff --ruN a/stablehlo/stablehlo/experimental/transforms/StablehloRefineShapes.c +} // namespace experimental +} // namespace stablehlo +} // namespace mlir -diff --ruN a/stablehlo/stablehlo/integrations/python/tests/stablehlo.py b/stablehlo/stablehlo/integrations/python/tests/stablehlo.py ---- stablehlo/stablehlo/integrations/python/tests/stablehlo.py -+++ stablehlo/stablehlo/integrations/python/tests/stablehlo.py -@@ -241,18 +241,18 @@ - # Formatted as (tensor_type, np_value) - # Program runs arg + arg, which is used for expected value - tests = [ -- # No numpy types for f8 - skipping fp8 tests -- ("f16", np.asarray(1, np.float16)), -- ("f32", np.asarray(2, np.float32)), -- ("f64", np.asarray(3, np.double)), -- ("1xi8", np.asarray([4], np.int8)), -- ("1xi16", np.asarray([5], np.int16)), -- ("1xi32", np.asarray([-6], np.int32)), -- # Numpy's uint treated as int by DenseElementsAttr, skipping np.uint tests -- ("2x2xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2,2)), -- ("2x1x2xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2,1,2)), -- ("?x?xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2,2)), -- ("?x2xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2,2)), -+ # No numpy types for f8 - skipping fp8 tests -+ ("f16", np.asarray(1, np.float16)), -+ ("f32", np.asarray(2, np.float32)), -+ ("f64", np.asarray(3, np.double)), -+ ("1xi8", np.asarray([4], np.int8)), -+ ("1xi16", np.asarray([5], np.int16)), -+ ("1xi32", np.asarray([-6], np.int32)), -+ # Numpy's uint treated as int by DenseElementsAttr, skipping np.uint tests -+ ("2x2xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2, 2)), -+ ("2x1x2xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2, 1, 2)), -+ ("?x?xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2, 2)), -+ ("?x2xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2, 2)), - ] - for test in tests: - tensor_type, arg = test -diff --ruN a/stablehlo/stablehlo/tests/ops_stablehlo_quantized.mlir b/stablehlo/stablehlo/tests/ops_stablehlo_quantized.mlir ---- stablehlo/stablehlo/tests/ops_stablehlo_quantized.mlir -+++ stablehlo/stablehlo/tests/ops_stablehlo_quantized.mlir -@@ -1066,146 +1066,6 @@ - - // ----- - --func.func @dot_general_hybrid_quantized(%arg0: tensor<2x3x4xf32>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5xf32> { -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4xf32>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5xf32> -- func.return %0 : tensor<2x4x5xf32> --} -- --// ----- -- --func.func @dot_general_c14(%arg0: tensor<2x3x4xf32>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> { -- // expected-error@+1 {{rhs should be quantized for quantized operations and is_quantized(lhs)=is_quantized(result) should hold}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4xf32>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> -- func.return %0 : tensor<2x4x5x!quant.uniform> --} -- --// ----- -- --func.func @dot_general_c15_per_tensor(%arg0: tensor<2x3x4xf32>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5xf32> { -- // expected-error@+1 {{Zero point of rhs should be 0}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4xf32>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5xf32> -- func.return %0 : tensor<2x4x5xf32> --} -- --// ----- -- --func.func @dot_general_c15_per_axis( -- %arg0: tensor<2x3x4x!quant.uniform>, -- %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> { -- // expected-error@+1 {{Zero points of rhs should be 0}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4x!quant.uniform>, -- tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> -- func.return %0 : tensor<2x4x5x!quant.uniform> --} -- --// ----- -- --func.func @dot_general_c16( -- %arg0: tensor<2x3x4x!quant.uniform>, -- %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> { -- // expected-error@+1 {{Quantization dimension of rhs should not be in the contracting dimension of rhs}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [1], -- rhs_batching_dimensions = [1], -- lhs_contracting_dimensions = [0], -- rhs_contracting_dimensions = [0] -- > -- } : (tensor<2x3x4x!quant.uniform>, -- tensor<2x3x5x!quant.uniform>) -> tensor<3x4x5x!quant.uniform> -- func.return %0 : tensor<3x4x5x!quant.uniform> --} -- --// ----- -- --func.func @dot_general_c17(%arg0: tensor<2x3x4x!quant.uniform>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> { -- // expected-error@+1 {{mismatched lhs and rhs quantization storage types}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4x!quant.uniform>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> -- func.return %0 : tensor<2x4x5x!quant.uniform> --} -- --// ----- -- --func.func @dot_general_c18(%arg0: tensor<2x3x4x!quant.uniform>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> { -- // expected-error@+1 {{mismatched lhs, rhs and result quantization expressed types}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4x!quant.uniform>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> -- func.return %0 : tensor<2x4x5x!quant.uniform> --} -- --// ----- -- --func.func @dot_general_c19(%arg0: tensor<2x3x4x!quant.uniform>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> { -- // expected-error@+1 {{mismatched rhs and result quantization granularity}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4x!quant.uniform>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> -- func.return %0 : tensor<2x4x5x!quant.uniform> --} -- --// ----- -- --func.func @dot_general_c20(%arg0: tensor<2x3x4xf32>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5xf32> { -- // expected-error@+1 {{mismatched rhs quantization expressed type and lhs and result element type}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4xf32>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5xf32> -- func.return %0 : tensor<2x4x5xf32> --} -- --// ----- -- - func.func @quantized_element_type_c8(%arg0: tensor<1x2x!quant.uniform:f32, 1.0:300>>) { - // expected-error-re@+1 {{operand #0 must be ranked tensor of {{.*}} 4/8/16/32-bit uniform quantized signed integer or 4/8/16/32-bit uniform quantized unsigned integer or 4/8/16/32-bit uniform quantized per axis signed integer or 4/8/16/32-bit uniform quantized per axis unsigned integer values, but got 'tensor<1x2x!quant.uniform>'}} - %0 = stablehlo.add %arg0, %arg0 : tensor<1x2x!quant.uniform:f32, 1.0:300>> -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_10_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_10_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_10_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_10_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -160,6 +175,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -170,6 +186,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -180,6 +197,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -190,6 +208,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -202,6 +221,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -210,6 +230,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -219,6 +240,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -228,6 +250,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -237,6 +260,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -246,6 +270,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -255,6 +280,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -264,6 +290,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -275,6 +302,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -287,6 +315,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -299,6 +328,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -313,10 +343,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -324,8 +353,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -339,8 +369,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -362,8 +393,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -380,8 +412,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -389,8 +422,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -401,8 +435,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -413,8 +448,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -442,8 +478,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -480,8 +518,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -489,8 +528,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -502,8 +542,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -531,8 +572,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -558,15 +600,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -587,8 +630,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -597,8 +641,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -606,8 +651,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -619,8 +665,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -632,8 +679,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -655,8 +703,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -678,8 +727,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -707,8 +757,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -736,8 +787,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -756,29 +808,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -794,8 +850,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -817,8 +874,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -836,22 +894,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -863,8 +924,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -876,8 +938,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -889,15 +952,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -907,8 +972,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -918,9 +984,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -929,22 +996,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -954,22 +1024,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -981,8 +1054,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -994,15 +1068,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1012,6 +1088,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1023,15 +1100,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1065,8 +1144,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1079,8 +1159,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1090,8 +1171,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1122,15 +1204,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1150,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1161,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1176,8 +1262,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1211,8 +1298,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1232,8 +1320,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1243,22 +1332,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1268,15 +1360,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1286,22 +1380,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1313,8 +1410,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1327,16 +1425,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1358,8 +1457,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1369,8 +1469,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1380,11 +1481,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1395,15 +1497,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1426,36 +1530,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1473,57 +1582,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1533,8 +1650,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1548,36 +1666,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1590,8 +1713,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1606,8 +1730,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1619,8 +1744,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1644,8 +1770,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1671,8 +1798,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1692,16 +1820,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1710,8 +1840,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1721,8 +1852,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1732,8 +1864,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1743,29 +1876,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1795,8 +1932,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1826,15 +1964,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1847,8 +1987,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1858,43 +1999,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1908,8 +2055,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1929,29 +2077,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1963,8 +2115,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1974,8 +2127,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1985,8 +2139,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2002,15 +2157,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2020,22 +2177,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2053,8 +2213,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2062,183 +2223,209 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_11_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_11_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_11_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_11_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -160,6 +175,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -170,6 +186,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -180,6 +197,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -190,6 +208,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -202,6 +221,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -210,6 +230,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -219,6 +240,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -228,6 +250,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -237,6 +260,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -246,6 +270,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -255,6 +280,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -264,6 +290,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -275,6 +302,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -287,6 +315,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -299,6 +328,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -313,10 +343,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -324,8 +353,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -339,8 +369,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -362,8 +393,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -380,8 +412,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -389,8 +422,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -401,8 +435,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -413,8 +448,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -442,8 +478,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -480,8 +518,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -489,8 +528,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -502,8 +542,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -531,8 +572,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -558,15 +600,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -587,8 +630,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -597,8 +641,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -606,8 +651,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -619,8 +665,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -632,8 +679,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -655,8 +703,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -678,8 +727,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -707,8 +757,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -736,8 +787,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -756,29 +808,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -794,8 +850,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -817,8 +874,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -836,22 +894,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -863,8 +924,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -876,8 +938,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -889,15 +952,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -907,8 +972,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -918,9 +984,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -929,22 +996,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -954,22 +1024,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -981,8 +1054,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -994,15 +1068,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1012,6 +1088,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1023,15 +1100,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1065,8 +1144,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1079,8 +1159,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1090,8 +1171,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1122,15 +1204,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1150,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1161,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1176,8 +1262,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1211,8 +1298,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1232,8 +1320,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1243,22 +1332,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1268,15 +1360,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1286,22 +1380,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1313,8 +1410,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1327,16 +1425,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1358,8 +1457,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1369,8 +1469,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1380,11 +1481,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1395,15 +1497,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1426,36 +1530,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1473,57 +1582,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1533,8 +1650,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1548,36 +1666,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1590,8 +1713,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1606,8 +1730,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1619,8 +1744,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1644,8 +1770,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1671,8 +1798,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1692,16 +1820,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1710,8 +1840,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1721,8 +1852,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1732,8 +1864,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1743,29 +1876,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1795,8 +1932,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1826,15 +1964,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1847,8 +1987,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1858,43 +1999,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1908,8 +2055,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1929,29 +2077,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1963,8 +2115,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1974,8 +2127,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1985,8 +2139,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2002,15 +2157,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2020,22 +2177,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2053,8 +2213,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2062,190 +2223,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_12_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_12_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_12_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_12_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -160,6 +175,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -170,6 +186,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -180,6 +197,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -190,6 +208,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -202,6 +221,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -210,6 +230,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -219,6 +240,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -228,6 +250,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -237,6 +260,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -246,6 +270,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -255,6 +280,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -264,6 +290,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -275,6 +302,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -287,6 +315,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -299,6 +328,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -313,10 +343,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -324,8 +353,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -339,8 +369,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -362,8 +393,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -380,8 +412,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -389,8 +422,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -401,8 +435,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -413,8 +448,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -442,8 +478,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -480,8 +518,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -489,8 +528,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -502,8 +542,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -531,8 +572,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -558,15 +600,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -587,8 +630,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -597,8 +641,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -606,8 +651,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -619,8 +665,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -632,8 +679,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -655,8 +703,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -678,8 +727,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -707,8 +757,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -736,8 +787,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -756,29 +808,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -794,8 +850,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -817,8 +874,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -836,22 +894,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -863,8 +924,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -876,8 +938,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -889,15 +952,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -907,8 +972,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -918,9 +984,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -929,22 +996,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -954,22 +1024,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -981,8 +1054,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -994,15 +1068,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1012,6 +1088,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1023,15 +1100,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1065,8 +1144,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1079,8 +1159,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1090,8 +1171,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1122,15 +1204,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1150,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1161,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1176,8 +1262,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1211,8 +1298,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1232,8 +1320,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1243,22 +1332,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1268,15 +1360,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1286,22 +1380,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1313,8 +1410,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1327,16 +1425,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1358,8 +1457,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1369,8 +1469,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1380,11 +1481,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1395,15 +1497,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1426,36 +1530,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1473,57 +1582,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1533,8 +1650,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1548,36 +1666,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1590,8 +1713,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1606,8 +1730,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1619,8 +1744,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1644,8 +1770,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1671,8 +1798,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1692,16 +1820,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1710,8 +1840,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1721,8 +1852,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1732,8 +1864,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1743,29 +1876,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1795,8 +1932,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1826,15 +1964,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1847,8 +1987,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1858,43 +1999,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1908,8 +2055,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1929,29 +2077,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1963,8 +2115,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1974,8 +2127,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1985,8 +2139,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2002,15 +2157,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2020,22 +2177,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2053,8 +2213,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2062,190 +2223,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_13_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_13_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_13_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_13_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -160,6 +175,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -170,6 +186,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -180,6 +197,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -190,6 +208,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -202,6 +221,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -210,6 +230,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -219,6 +240,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -228,6 +250,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -237,6 +260,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -246,6 +270,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -255,6 +280,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -264,6 +290,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -275,6 +302,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -287,6 +315,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -299,6 +328,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -313,10 +343,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -324,8 +353,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -339,8 +369,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -362,8 +393,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -380,8 +412,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -389,8 +422,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -401,8 +435,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -413,8 +448,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -442,8 +478,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -480,8 +518,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -489,8 +528,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -502,8 +542,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -531,8 +572,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -558,15 +600,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -587,8 +630,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -597,8 +641,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -606,8 +651,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -619,8 +665,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -632,8 +679,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -655,8 +703,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -678,8 +727,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -707,8 +757,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -736,8 +787,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -756,29 +808,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -794,8 +850,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -817,8 +874,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -836,22 +894,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -863,8 +924,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -876,8 +938,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -889,15 +952,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -907,8 +972,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -918,9 +984,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -929,22 +996,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -954,22 +1024,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -981,8 +1054,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -994,15 +1068,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1012,6 +1088,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1023,15 +1100,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1065,8 +1144,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1079,8 +1159,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1090,8 +1171,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1122,15 +1204,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1150,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1161,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1176,8 +1262,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1211,8 +1298,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1232,8 +1320,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1243,22 +1332,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1268,15 +1360,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1286,22 +1380,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1313,8 +1410,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1327,16 +1425,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1358,8 +1457,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1369,8 +1469,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1380,11 +1481,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1395,15 +1497,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1426,36 +1530,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1473,57 +1582,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1533,8 +1650,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1548,36 +1666,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1590,8 +1713,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1606,8 +1730,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1619,8 +1744,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1644,8 +1770,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1671,8 +1798,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1692,16 +1820,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1710,8 +1840,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1721,8 +1852,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1732,8 +1864,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1743,29 +1876,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1795,8 +1932,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1826,15 +1964,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1847,8 +1987,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1858,43 +1999,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1908,8 +2055,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1929,29 +2077,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1963,8 +2115,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1974,8 +2127,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1985,8 +2139,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2002,15 +2157,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2020,22 +2177,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2053,8 +2213,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2062,190 +2223,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_14_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_14_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_14_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_14_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -160,6 +175,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -170,6 +186,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -180,6 +197,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -190,6 +208,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -202,6 +221,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -210,6 +230,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -219,6 +240,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -228,6 +250,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -237,6 +260,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -246,6 +270,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -255,6 +280,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -264,6 +290,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -275,6 +302,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -287,6 +315,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -299,6 +328,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -313,10 +343,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -324,8 +353,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -339,8 +369,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -362,8 +393,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -380,8 +412,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -389,8 +422,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -401,8 +435,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -413,8 +448,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -442,8 +478,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -480,8 +518,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -489,8 +528,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -502,8 +542,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -531,8 +572,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -558,15 +600,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -587,8 +630,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -597,8 +641,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -606,8 +651,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -619,8 +665,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -632,8 +679,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -655,8 +703,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -678,8 +727,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -707,8 +757,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -736,8 +787,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -756,29 +808,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -794,8 +850,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -817,8 +874,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -836,22 +894,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -863,8 +924,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -876,8 +938,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -889,15 +952,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -907,8 +972,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -918,9 +984,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -929,22 +996,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -954,22 +1024,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -981,8 +1054,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -994,15 +1068,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1012,6 +1088,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1023,15 +1100,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1065,8 +1144,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1079,8 +1159,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1090,8 +1171,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1122,15 +1204,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1150,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1161,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1176,8 +1262,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1211,8 +1298,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1232,8 +1320,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1243,22 +1332,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1268,15 +1360,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1286,22 +1380,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1313,8 +1410,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1327,16 +1425,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1358,8 +1457,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1369,8 +1469,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1380,11 +1481,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1395,15 +1497,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1426,36 +1530,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1473,57 +1582,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1533,8 +1650,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1548,36 +1666,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1590,8 +1713,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1606,8 +1730,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1619,8 +1744,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1644,8 +1770,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1671,8 +1798,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1692,16 +1820,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1710,8 +1840,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1721,8 +1852,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1732,8 +1864,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1743,29 +1876,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1795,8 +1932,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1826,15 +1964,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1847,8 +1987,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1858,43 +1999,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1908,8 +2055,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1929,29 +2077,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1963,8 +2115,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1974,8 +2127,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1985,8 +2139,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2002,15 +2157,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2020,22 +2177,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2053,8 +2213,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2062,190 +2223,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_15_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_15_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_15_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_15_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -166,6 +181,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -176,6 +192,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -186,6 +203,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -196,6 +214,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -208,6 +227,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -216,6 +236,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -225,6 +246,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -234,6 +256,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -243,6 +266,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -252,6 +276,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -261,6 +286,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -270,6 +296,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -281,6 +308,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -293,6 +321,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -305,6 +334,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -319,10 +349,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -330,8 +359,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -345,8 +375,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -368,8 +399,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -386,8 +418,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -395,8 +428,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -407,8 +441,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -419,8 +454,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -448,8 +484,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -466,8 +503,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -486,8 +524,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -495,8 +534,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -508,8 +548,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -537,8 +578,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -564,15 +606,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -593,8 +636,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -603,8 +647,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -612,8 +657,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -625,8 +671,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -638,8 +685,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -661,8 +709,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -684,8 +733,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -713,8 +763,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -742,8 +793,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -762,29 +814,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -800,8 +856,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -823,8 +880,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -842,22 +900,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -869,8 +930,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -882,8 +944,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -895,15 +958,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -913,8 +978,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -924,9 +990,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -935,22 +1002,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -960,22 +1030,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -987,8 +1060,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1000,15 +1074,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1018,6 +1094,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1029,15 +1106,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1071,8 +1150,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1085,8 +1165,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1096,8 +1177,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1128,15 +1210,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1156,8 +1240,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1167,8 +1252,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1182,8 +1268,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1217,8 +1304,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1238,8 +1326,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1249,22 +1338,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1274,15 +1366,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1292,22 +1386,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1319,8 +1416,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1333,16 +1431,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1364,8 +1463,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1375,8 +1475,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1386,11 +1487,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1401,15 +1503,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1432,36 +1536,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1479,57 +1588,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1539,8 +1656,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1554,36 +1672,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1596,8 +1719,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1612,8 +1736,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1625,8 +1750,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1650,8 +1776,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1677,8 +1804,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1698,16 +1826,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1716,8 +1846,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1727,8 +1858,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1738,8 +1870,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1749,29 +1882,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1801,8 +1938,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1832,15 +1970,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1853,8 +1993,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1864,43 +2005,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1914,8 +2061,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1935,29 +2083,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1969,8 +2121,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1980,8 +2133,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1991,8 +2145,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2008,15 +2163,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2026,22 +2183,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2059,8 +2219,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2068,190 +2229,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_16_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_16_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_16_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_16_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -166,6 +181,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -176,6 +192,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -186,6 +203,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -196,6 +214,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -208,6 +227,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -216,6 +236,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -225,6 +246,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -234,6 +256,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -243,6 +266,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -252,6 +276,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -261,6 +286,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -270,6 +296,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -281,6 +308,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -293,6 +321,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -305,6 +334,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -319,10 +349,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -330,8 +359,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -345,8 +375,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -368,8 +399,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -386,8 +418,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -395,8 +428,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -407,8 +441,9 @@ - } - - // CHECK-LABEL: "default_collective_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_broadcast(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -419,8 +454,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -431,8 +467,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -478,8 +516,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -498,8 +537,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -507,8 +547,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -520,8 +561,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -549,8 +591,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -576,15 +619,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -605,8 +649,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -615,8 +660,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -624,8 +670,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -637,8 +684,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -650,8 +698,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -673,8 +722,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -696,8 +746,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -725,8 +776,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -754,8 +806,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -774,29 +827,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -812,8 +869,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -835,8 +893,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -854,22 +913,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -881,8 +943,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -894,8 +957,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -907,15 +971,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -925,8 +991,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -936,9 +1003,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -947,22 +1015,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -972,22 +1043,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -999,8 +1073,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1012,15 +1087,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1030,6 +1107,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1041,15 +1119,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1083,8 +1163,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1097,8 +1178,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1108,8 +1190,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1140,15 +1223,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1168,8 +1253,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1179,8 +1265,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1194,8 +1281,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1229,8 +1317,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1250,8 +1339,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1261,22 +1351,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1286,15 +1379,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1304,22 +1399,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1331,8 +1429,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1345,16 +1444,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1376,8 +1476,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1387,8 +1488,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1398,11 +1500,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1413,15 +1516,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1444,36 +1549,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1491,57 +1601,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1551,8 +1669,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1566,36 +1685,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1608,8 +1732,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1624,8 +1749,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1637,8 +1763,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1662,8 +1789,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1689,8 +1817,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1710,16 +1839,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1728,8 +1859,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1739,8 +1871,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1750,8 +1883,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1761,29 +1895,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1813,8 +1951,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1844,15 +1983,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1865,8 +2006,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1876,43 +2018,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1926,8 +2074,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1947,29 +2096,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1981,8 +2134,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1992,8 +2146,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -2003,8 +2158,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2020,15 +2176,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2038,22 +2196,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2071,8 +2232,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2080,190 +2242,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_17_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_17_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_17_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_17_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -166,6 +181,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -176,6 +192,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -186,6 +203,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -196,6 +214,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -208,6 +227,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -216,6 +236,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -225,6 +246,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -234,6 +256,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -243,6 +266,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -252,6 +276,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -261,6 +286,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -270,6 +296,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -281,6 +308,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -293,6 +321,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -305,6 +334,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -319,10 +349,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -330,8 +359,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -345,8 +375,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -368,8 +399,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -386,8 +418,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -395,8 +428,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -407,8 +441,9 @@ - } - - // CHECK-LABEL: "default_collective_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_broadcast(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -419,8 +454,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -431,8 +467,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -478,8 +516,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -498,8 +537,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -507,8 +547,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -520,8 +561,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -549,8 +591,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -576,15 +619,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -605,8 +649,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -615,8 +660,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -624,8 +670,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -637,8 +684,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -650,8 +698,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -673,8 +722,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -696,8 +746,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -725,8 +776,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -754,8 +806,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -774,29 +827,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -812,8 +869,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -836,7 +894,7 @@ - - // CHECK-LABEL: "op_all_reduce_with_promotable_types" - func.func @op_all_reduce_with_promotable_types(%operand: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -854,8 +912,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -873,22 +932,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -900,8 +962,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -913,8 +976,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -926,15 +990,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -944,8 +1010,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -955,9 +1022,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -966,22 +1034,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -991,22 +1062,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -1018,8 +1092,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1031,15 +1106,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1049,6 +1126,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1060,15 +1138,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1102,8 +1182,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1116,8 +1197,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1127,8 +1209,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1159,15 +1242,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1187,8 +1272,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1198,8 +1284,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1213,8 +1300,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1248,8 +1336,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1269,8 +1358,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1280,22 +1370,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1305,15 +1398,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1323,22 +1418,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1350,8 +1448,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1364,16 +1463,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1395,8 +1495,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1406,8 +1507,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1417,11 +1519,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1432,15 +1535,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1463,36 +1568,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1510,57 +1620,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1570,8 +1688,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1585,36 +1704,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1627,8 +1751,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1643,8 +1768,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1658,7 +1784,7 @@ - // CHECK_lABEL: "op_reduce_with_promotable_types" - func.func @op_reduce_with_promotable_types(%arg0: tensor<4x4xf32>, %arg1 : tensor) - -> (tensor<4xf64>) { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0:.*]], %[[ARG1:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f64_v1> -@@ -1673,8 +1799,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1699,7 +1826,7 @@ - - // CHECK_lABEL: "op_reduce_scatter_with_promotable_types" - func.func @op_reduce_scatter_with_promotable_types(%data: tensor<4x16xf32>) -> tensor<4x4xf64> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f64_v1> -@@ -1716,8 +1843,9 @@ - - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1742,11 +1870,11 @@ - func.return %0 : tensor<2x9x16x7xf32> - } - --// CHECK_lABEL: "op_reduce_window_with_promotable_types" -+// CHECK-LABEL: "op_reduce_window_with_promotable_types" - func.func @op_reduce_window_with_promotable_types(%arg0: tensor<4x2xf32>, - %arg1: tensor<4x2xf32>, %init0: tensor, %init1: tensor) -> - (tensor<2x2xf64>, tensor<2x2xf32>) { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1, %arg2, %arg3) -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1, %[[ARG3:arg.*]]: !vhlo.tensor_v1, %[[ARG4:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]], %[[VAL2:.*]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1, !vhlo.tensor_v1) -> (!vhlo.tensor_v1<2x2x!vhlo.f64_v1>, !vhlo.tensor_v1<2x2x!vhlo.f32_v1>) -@@ -1766,8 +1894,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1787,16 +1916,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1805,8 +1936,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1816,8 +1948,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1827,8 +1960,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1838,29 +1972,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1893,7 +2031,7 @@ - func.func @op_scatter_with_promotable_types(%input_tensor: tensor<200x100x300xf32>, - %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) -> - tensor<200x100x300xf64> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>, !vhlo.tensor_v1<10x2x!vhlo.i32_v1>, !vhlo.tensor_v1<10x300x!vhlo.f32_v1>) -> !vhlo.tensor_v1<200x100x300x!vhlo.f64_v1> -@@ -1916,8 +2054,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1947,8 +2086,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter_with_promotable_types" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter_with_promotable_types(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf64> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: %[[VAL:.*]] = "vhlo.add_v1"(%[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - // CHECK: "vhlo.return_v1"(%[[VAL]]) : (!vhlo.tensor_v1) -> () -@@ -1970,15 +2110,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1991,8 +2133,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -2002,43 +2145,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -2052,8 +2201,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -2073,29 +2223,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -2107,8 +2261,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -2118,8 +2273,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -2129,8 +2285,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2146,15 +2303,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2164,22 +2323,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2197,8 +2359,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2206,190 +2369,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_18_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_18_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_18_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_18_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -166,6 +181,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -176,6 +192,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -186,6 +203,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -196,6 +214,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -208,6 +227,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -216,6 +236,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -225,6 +246,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -234,6 +256,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -243,6 +266,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -252,6 +276,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -261,6 +286,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -270,6 +296,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -281,6 +308,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -293,6 +321,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -305,6 +334,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -319,10 +349,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -330,8 +359,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -345,8 +375,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -368,8 +399,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -386,8 +418,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -395,8 +428,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -407,8 +441,9 @@ - } - - // CHECK-LABEL: "default_collective_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_broadcast(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -419,8 +454,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -431,8 +467,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -478,8 +516,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -498,8 +537,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -507,8 +547,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -520,8 +561,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -549,8 +591,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -576,15 +619,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -605,8 +649,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -615,8 +660,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -624,8 +670,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -637,8 +684,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -650,8 +698,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -673,8 +722,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -696,8 +746,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -725,8 +776,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -754,8 +806,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -774,29 +827,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -812,8 +869,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -836,7 +894,7 @@ - - // CHECK-LABEL: "op_all_reduce_with_promotable_types" - func.func @op_all_reduce_with_promotable_types(%operand: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -854,8 +912,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -873,22 +932,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -900,8 +962,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -913,8 +976,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -926,15 +990,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -944,8 +1010,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -955,9 +1022,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -966,22 +1034,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -991,22 +1062,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -1018,8 +1092,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1031,15 +1106,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1049,6 +1126,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1060,15 +1138,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1102,8 +1182,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1116,8 +1197,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1127,8 +1209,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1159,15 +1242,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1187,8 +1272,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1198,8 +1284,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1213,8 +1300,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1248,8 +1336,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1269,8 +1358,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1280,22 +1370,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1305,15 +1398,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1323,22 +1418,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1350,8 +1448,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1364,16 +1463,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1395,8 +1495,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1406,8 +1507,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1417,11 +1519,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1432,15 +1535,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1463,36 +1568,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1510,57 +1620,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1570,8 +1688,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1585,36 +1704,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1627,8 +1751,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1643,8 +1768,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1658,7 +1784,7 @@ - // CHECK_lABEL: "op_reduce_with_promotable_types" - func.func @op_reduce_with_promotable_types(%arg0: tensor<4x4xf32>, %arg1 : tensor) - -> (tensor<4xf64>) { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0:.*]], %[[ARG1:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f64_v1> -@@ -1673,8 +1799,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1699,7 +1826,7 @@ - - // CHECK_lABEL: "op_reduce_scatter_with_promotable_types" - func.func @op_reduce_scatter_with_promotable_types(%data: tensor<4x16xf32>) -> tensor<4x4xf64> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f64_v1> -@@ -1716,8 +1843,9 @@ - - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1746,7 +1874,7 @@ - func.func @op_reduce_window_with_promotable_types(%arg0: tensor<4x2xf32>, - %arg1: tensor<4x2xf32>, %init0: tensor, %init1: tensor) -> - (tensor<2x2xf64>, tensor<2x2xf32>) { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1, %arg2, %arg3) -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1, %[[ARG3:arg.*]]: !vhlo.tensor_v1, %[[ARG4:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]], %[[VAL2:.*]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1, !vhlo.tensor_v1) -> (!vhlo.tensor_v1<2x2x!vhlo.f64_v1>, !vhlo.tensor_v1<2x2x!vhlo.f32_v1>) -@@ -1766,8 +1894,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1787,16 +1916,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1805,8 +1936,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1816,8 +1948,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1827,8 +1960,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1838,29 +1972,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1893,7 +2031,7 @@ - func.func @op_scatter_with_promotable_types(%input_tensor: tensor<200x100x300xf32>, - %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) -> - tensor<200x100x300xf64> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>, !vhlo.tensor_v1<10x2x!vhlo.i32_v1>, !vhlo.tensor_v1<10x300x!vhlo.f32_v1>) -> !vhlo.tensor_v1<200x100x300x!vhlo.f64_v1> -@@ -1916,8 +2054,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1947,8 +2086,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter_with_promotable_types" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter_with_promotable_types(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf64> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: %[[VAL:.*]] = "vhlo.add_v1"(%[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - // CHECK: "vhlo.return_v1"(%[[VAL]]) : (!vhlo.tensor_v1) -> () -@@ -1970,15 +2110,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1991,8 +2133,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -2002,43 +2145,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -2052,8 +2201,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -2073,29 +2223,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -2107,8 +2261,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -2118,8 +2273,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -2129,8 +2285,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2146,15 +2303,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2164,22 +2323,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2197,8 +2359,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2206,197 +2369,225 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_per_tensor_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_per_tensor_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_per_axis_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_per_axis_quantization(%arg0: tensor<2x!quant.uniform>) -> tensor<2x!quant.uniform> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg0) : (!vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>, !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>) -> !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG0]]) : (!vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>, !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>) -> !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1> - %0 = stablehlo.add %arg0, %arg0 : tensor<2x!quant.uniform> - func.return %0 : tensor<2x!quant.uniform> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_19_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_19_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_19_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_19_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -166,6 +181,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -176,6 +192,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -186,6 +203,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -196,6 +214,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -208,6 +227,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -216,6 +236,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -225,6 +246,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -234,6 +256,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -243,6 +266,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -252,6 +276,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -261,6 +286,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -270,6 +296,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -281,6 +308,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -293,6 +321,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -305,6 +334,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -319,10 +349,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -330,8 +359,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -345,8 +375,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -368,8 +399,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -386,8 +418,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -395,8 +428,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -407,8 +441,9 @@ - } - - // CHECK-LABEL: "default_collective_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_broadcast(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -419,8 +454,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -431,8 +467,9 @@ - } - - // CHECK-LABEL: "default_composite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_composite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.composite_v1"(%arg0) <{ -+ // CHECK: "vhlo.composite_v1"(%[[ARG0]]) <{ - // CHECK-SAME: composite_attributes = #vhlo.dict_v1<{}> - // CHECK-SAME: decomposition = #vhlo.string_v1<"composite_target"> - // CHECK-SAME: name = #vhlo.string_v1<"stablehlo.composite_target"> -@@ -446,8 +483,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -475,8 +513,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -493,8 +532,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -513,8 +553,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -522,8 +563,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -535,8 +577,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -564,8 +607,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -591,15 +635,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -620,8 +665,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -630,8 +676,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -639,8 +686,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -652,8 +700,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -665,8 +714,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -688,8 +738,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -711,8 +762,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -740,8 +792,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -769,8 +822,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -789,29 +843,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -827,8 +885,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -851,7 +910,7 @@ - - // CHECK-LABEL: "op_all_reduce_with_promotable_types" - func.func @op_all_reduce_with_promotable_types(%operand: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -869,8 +928,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -888,22 +948,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -915,8 +978,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -928,8 +992,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -941,15 +1006,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -959,8 +1026,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -970,9 +1038,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -981,22 +1050,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -1006,22 +1078,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -1033,8 +1108,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1046,15 +1122,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_composite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_composite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.composite_v1"(%arg0) <{ -+ // CHECK: "vhlo.composite_v1"(%[[ARG0]]) <{ - // CHECK-SAME: composite_attributes = #vhlo.dict_v1<{#vhlo.string_v1<"my_int"> = #vhlo.integer_v1<1 : i64>, #vhlo.string_v1<"my_string"> = #vhlo.string_v1<"foo">}> - // CHECK-SAME: decomposition = #vhlo.string_v1<"composite_target"> - // CHECK-SAME: name = #vhlo.string_v1<"stablehlo.composite_target"> -@@ -1073,8 +1151,9 @@ - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1084,6 +1163,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1095,15 +1175,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1137,8 +1219,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1151,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1162,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1194,15 +1279,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1222,8 +1309,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1233,8 +1321,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1248,8 +1337,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1283,8 +1373,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1304,8 +1395,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1315,22 +1407,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1340,15 +1435,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1358,22 +1455,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1385,8 +1485,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1399,16 +1500,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1430,8 +1532,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1441,8 +1544,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1452,11 +1556,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1467,15 +1572,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1498,36 +1605,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1545,57 +1657,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1605,8 +1725,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1620,36 +1741,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1662,8 +1788,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1678,8 +1805,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1690,10 +1818,10 @@ - func.return %0 : tensor - } - --// CHECK_lABEL: "op_reduce_with_promotable_types" -+// CHECK-LABEL: "op_reduce_with_promotable_types" - func.func @op_reduce_with_promotable_types(%arg0: tensor<4x4xf32>, %arg1 : tensor) - -> (tensor<4xf64>) { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0:.*]], %[[ARG1:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f64_v1> -@@ -1708,8 +1836,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1734,7 +1863,7 @@ - - // CHECK_lABEL: "op_reduce_scatter_with_promotable_types" - func.func @op_reduce_scatter_with_promotable_types(%data: tensor<4x16xf32>) -> tensor<4x4xf64> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f64_v1> -@@ -1751,8 +1880,9 @@ - - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1781,7 +1911,7 @@ - func.func @op_reduce_window_with_promotable_types(%arg0: tensor<4x2xf32>, - %arg1: tensor<4x2xf32>, %init0: tensor, %init1: tensor) -> - (tensor<2x2xf64>, tensor<2x2xf32>) { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1, %arg2, %arg3) -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1, %[[ARG3:arg.*]]: !vhlo.tensor_v1, %[[ARG4:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]], %[[VAL2:.*]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1, !vhlo.tensor_v1) -> (!vhlo.tensor_v1<2x2x!vhlo.f64_v1>, !vhlo.tensor_v1<2x2x!vhlo.f32_v1>) -@@ -1801,8 +1931,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1822,16 +1953,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1840,8 +1973,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1851,8 +1985,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1862,8 +1997,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1873,29 +2009,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1928,7 +2068,7 @@ - func.func @op_scatter_with_promotable_types(%input_tensor: tensor<200x100x300xf32>, - %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) -> - tensor<200x100x300xf64> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>, !vhlo.tensor_v1<10x2x!vhlo.i32_v1>, !vhlo.tensor_v1<10x300x!vhlo.f32_v1>) -> !vhlo.tensor_v1<200x100x300x!vhlo.f64_v1> -@@ -1951,8 +2091,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1982,8 +2123,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter_with_promotable_types" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter_with_promotable_types(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf64> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: %[[VAL:.*]] = "vhlo.add_v1"(%[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - // CHECK: "vhlo.return_v1"(%[[VAL]]) : (!vhlo.tensor_v1) -> () -@@ -2005,15 +2147,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -2026,8 +2170,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -2037,43 +2182,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -2087,8 +2238,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -2108,29 +2260,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -2142,8 +2298,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -2153,8 +2310,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -2164,8 +2322,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2181,15 +2340,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2199,22 +2360,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2232,8 +2396,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2241,197 +2406,225 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_per_tensor_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_per_tensor_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_per_axis_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_per_axis_quantization(%arg0: tensor<2x!quant.uniform>) -> tensor<2x!quant.uniform> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg0) : (!vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>, !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>) -> !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG0]]) : (!vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>, !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>) -> !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1> - %0 = stablehlo.add %arg0, %arg0 : tensor<2x!quant.uniform> - func.return %0 : tensor<2x!quant.uniform> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_9_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_9_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_9_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_9_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -160,6 +175,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -170,6 +186,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -180,6 +197,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -190,6 +208,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -202,6 +221,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -210,6 +230,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -219,6 +240,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -228,6 +250,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -237,6 +260,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -246,6 +270,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -255,6 +280,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -264,6 +290,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -275,6 +302,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -287,6 +315,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -299,6 +328,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -313,10 +343,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -324,8 +353,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -339,8 +369,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -362,8 +393,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -380,8 +412,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -389,8 +422,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -401,8 +435,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -413,8 +448,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -442,8 +478,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -480,8 +518,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -489,8 +528,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -502,8 +542,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -531,8 +572,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -558,15 +600,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -587,8 +630,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -597,8 +641,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -606,8 +651,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -619,8 +665,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -632,8 +679,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -655,8 +703,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -678,8 +727,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -707,8 +757,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -736,8 +787,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -756,29 +808,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -794,8 +850,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -817,8 +874,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -836,22 +894,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -863,8 +924,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -876,8 +938,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -889,15 +952,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -907,8 +972,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -918,9 +984,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -929,22 +996,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -954,22 +1024,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -981,8 +1054,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -994,15 +1068,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1012,6 +1088,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1023,15 +1100,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1065,8 +1144,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1079,8 +1159,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1090,8 +1171,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1122,15 +1204,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1150,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1161,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1176,8 +1262,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1211,8 +1298,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1232,8 +1320,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1243,22 +1332,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1268,15 +1360,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1286,22 +1380,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1313,8 +1410,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1327,16 +1425,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1358,8 +1457,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1369,8 +1469,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1380,11 +1481,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1395,15 +1497,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1426,36 +1530,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1473,57 +1582,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1533,8 +1650,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1548,36 +1666,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1590,8 +1713,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1606,8 +1730,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1619,8 +1744,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1644,8 +1770,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1671,8 +1798,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1692,16 +1820,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1710,8 +1840,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1721,8 +1852,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1732,8 +1864,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1743,29 +1876,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1795,8 +1932,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1826,15 +1964,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1847,8 +1987,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1858,43 +1999,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1908,8 +2055,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1929,29 +2077,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1963,8 +2115,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1974,8 +2127,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1985,8 +2139,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2002,15 +2157,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2020,22 +2177,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2053,8 +2213,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2062,169 +2223,193 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -166,6 +181,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -176,6 +192,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -186,6 +203,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -196,6 +214,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -208,6 +227,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -216,6 +236,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -225,6 +246,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -234,6 +256,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -243,6 +266,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -252,6 +276,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -261,6 +286,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -270,6 +296,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -281,6 +308,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -293,6 +321,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -305,6 +334,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -319,10 +349,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -330,8 +359,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -345,8 +375,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -368,8 +399,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -386,8 +418,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -395,8 +428,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -407,8 +441,9 @@ - } - - // CHECK-LABEL: "default_collective_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_broadcast(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -419,8 +454,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -431,8 +467,9 @@ - } - - // CHECK-LABEL: "default_composite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_composite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.composite_v1"(%arg0) <{ -+ // CHECK: "vhlo.composite_v1"(%[[ARG0]]) <{ - // CHECK-SAME: composite_attributes = #vhlo.dict_v1<{}> - // CHECK-SAME: decomposition = #vhlo.string_v1<"composite_target"> - // CHECK-SAME: name = #vhlo.string_v1<"stablehlo.composite_target"> -@@ -446,8 +483,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -475,8 +513,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -493,8 +532,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -513,8 +553,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -522,8 +563,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -535,8 +577,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -564,8 +607,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -591,15 +635,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -620,8 +665,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -630,8 +676,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -639,8 +686,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -652,8 +700,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -665,8 +714,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -688,8 +738,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -711,8 +762,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -740,8 +792,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -769,8 +822,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -789,29 +843,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -827,8 +885,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -851,7 +910,7 @@ - - // CHECK-LABEL: "op_all_reduce_with_promotable_types" - func.func @op_all_reduce_with_promotable_types(%operand: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -869,8 +928,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -888,22 +948,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -915,8 +978,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -928,8 +992,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -941,15 +1006,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -959,8 +1026,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -970,9 +1038,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -981,22 +1050,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -1006,22 +1078,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -1033,8 +1108,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1046,15 +1122,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_composite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_composite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.composite_v1"(%arg0) <{ -+ // CHECK: "vhlo.composite_v1"(%[[ARG0]]) <{ - // CHECK-SAME: composite_attributes = #vhlo.dict_v1<{#vhlo.string_v1<"my_int"> = #vhlo.integer_v1<1 : i64>, #vhlo.string_v1<"my_string"> = #vhlo.string_v1<"foo">}> - // CHECK-SAME: decomposition = #vhlo.string_v1<"composite_target"> - // CHECK-SAME: name = #vhlo.string_v1<"stablehlo.composite_target"> -@@ -1073,8 +1151,9 @@ - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1084,6 +1163,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1095,15 +1175,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1137,8 +1219,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1151,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1162,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1194,15 +1279,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1222,8 +1309,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1233,8 +1321,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1248,8 +1337,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1283,8 +1373,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1304,8 +1395,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1315,22 +1407,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1340,15 +1435,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1358,22 +1455,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1385,8 +1485,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1399,16 +1500,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1430,8 +1532,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1441,8 +1544,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1452,11 +1556,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1467,15 +1572,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1498,36 +1605,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1545,57 +1657,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1605,8 +1725,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1620,36 +1741,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1662,8 +1788,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1678,8 +1805,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1693,7 +1821,7 @@ - // CHECK_lABEL: "op_reduce_with_promotable_types" - func.func @op_reduce_with_promotable_types(%arg0: tensor<4x4xf32>, %arg1 : tensor) - -> (tensor<4xf64>) { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0:.*]], %[[ARG1:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f64_v1> -@@ -1708,8 +1836,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1734,7 +1863,7 @@ - - // CHECK_lABEL: "op_reduce_scatter_with_promotable_types" - func.func @op_reduce_scatter_with_promotable_types(%data: tensor<4x16xf32>) -> tensor<4x4xf64> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f64_v1> -@@ -1751,8 +1880,9 @@ - - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1777,11 +1907,11 @@ - func.return %0 : tensor<2x9x16x7xf32> - } - --// CHECK_lABEL: "op_reduce_window_with_promotable_types" -+// CHECK-LABEL: "op_reduce_window_with_promotable_types" - func.func @op_reduce_window_with_promotable_types(%arg0: tensor<4x2xf32>, - %arg1: tensor<4x2xf32>, %init0: tensor, %init1: tensor) -> - (tensor<2x2xf64>, tensor<2x2xf32>) { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1, %arg2, %arg3) -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1, %[[ARG3:arg.*]]: !vhlo.tensor_v1, %[[ARG4:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]], %[[VAL2:.*]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1, !vhlo.tensor_v1) -> (!vhlo.tensor_v1<2x2x!vhlo.f64_v1>, !vhlo.tensor_v1<2x2x!vhlo.f32_v1>) -@@ -1801,8 +1931,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1822,16 +1953,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1840,8 +1973,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1851,8 +1985,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1862,8 +1997,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1873,29 +2009,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1928,7 +2068,7 @@ - func.func @op_scatter_with_promotable_types(%input_tensor: tensor<200x100x300xf32>, - %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) -> - tensor<200x100x300xf64> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>, !vhlo.tensor_v1<10x2x!vhlo.i32_v1>, !vhlo.tensor_v1<10x300x!vhlo.f32_v1>) -> !vhlo.tensor_v1<200x100x300x!vhlo.f64_v1> -@@ -1951,8 +2091,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1982,8 +2123,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter_with_promotable_types" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter_with_promotable_types(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf64> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: %[[VAL:.*]] = "vhlo.add_v1"(%[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - // CHECK: "vhlo.return_v1"(%[[VAL]]) : (!vhlo.tensor_v1) -> () -@@ -2005,15 +2147,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -2026,8 +2170,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -2037,43 +2182,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -2087,8 +2238,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -2108,29 +2260,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -2142,8 +2298,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -2153,8 +2310,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -2164,8 +2322,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2181,15 +2340,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2199,22 +2360,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2232,8 +2396,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2241,197 +2406,225 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_per_tensor_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_per_tensor_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_per_axis_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_per_axis_quantization(%arg0: tensor<2x!quant.uniform>) -> tensor<2x!quant.uniform> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg0) : (!vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>, !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>) -> !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG0]]) : (!vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>, !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>) -> !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1> - %0 = stablehlo.add %arg0, %arg0 : tensor<2x!quant.uniform> - func.return %0 : tensor<2x!quant.uniform> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" diff --git a/third_party/stablehlo/workspace.bzl b/third_party/stablehlo/workspace.bzl index 6a72c8fa16885c..aaef166d96583c 100644 --- a/third_party/stablehlo/workspace.bzl +++ b/third_party/stablehlo/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") def repo(): # LINT.IfChange - STABLEHLO_COMMIT = "8ba7728d3fdc3ea882e893ee7e53255c95ee0e5a" - STABLEHLO_SHA256 = "1dfc7179dc9200c3ab4ea85edbac4a35393866d8cd8694fcaac00c1d27036408" + STABLEHLO_COMMIT = "797bee217e1a041e9aac22cad4db207274596d94" + STABLEHLO_SHA256 = "e5619033e131ea2eeb9eab8c8e362f3ba12e111c6b4a15dac789ca216ff22c58" # LINT.ThenChange(Google-internal path) tf_http_archive( diff --git a/third_party/xla/third_party/stablehlo/temporary.patch b/third_party/xla/third_party/stablehlo/temporary.patch index f906a856ae69e1..abb329aec579e4 100755 --- a/third_party/xla/third_party/stablehlo/temporary.patch +++ b/third_party/xla/third_party/stablehlo/temporary.patch @@ -164,81 +164,6 @@ diff --ruN a/stablehlo/CMakeLists.txt b/stablehlo/CMakeLists.txt #------------------------------------------------------------------------------- # Directory setup -diff --ruN a/stablehlo/docs/spec.md b/stablehlo/docs/spec.md ---- stablehlo/docs/spec.md -+++ stablehlo/docs/spec.md -@@ -2532,10 +2532,10 @@ - rhs_batching_dimensions, lhs_contracting_dimensions, - rhs_contracting_dimensions, precision_config), lhs, rhs, type(result))`. - --For hybrid quantized types, performs `hybrid_dequantize_then_op( -- lambda lhs, rhs: dot_general(lhs, rhs, lhs_batching_dimensions, -- rhs_batching_dimensions, lhs_contracting_dimensions, -- rhs_contracting_dimensions, precision_config), lhs, rhs)`. -+This only specifies semantics for per-tensor quantization. Per-axis quantization -+is work in progress ([#1574](https://github.com/openxla/stablehlo/issues/1574)). -+Also, in the future we may consider adding support for hybrid quantization -+ ([#1575](https://github.com/openxla/stablehlo/issues/1575)). - - `precision_config` controls the tradeoff between speed and accuracy for - computations on accelerator backends. This can be one of the following (at the -@@ -2552,21 +2552,21 @@ - - #### Inputs - --| Label | Name | Type | Constraints | --|-------|------------------------------|--------------------------------------------------------------|------------------------------------------------| --| (I1) | `lhs` | tensor or per-tensor quantized tensor | (C5-C6), (C9-C10), (C12-C14), (C17-C18), (C20) | --| (I2) | `rhs` | tensor or quantized tensor | (C7-C10), (C12-C20) | --| (I3) | `lhs_batching_dimensions` | 1-dimensional tensor constant of type `si64` | (C1), (C3), (C5), (C9), (C12) | --| (I4) | `rhs_batching_dimensions` | 1-dimensional tensor constant of type `si64` | (C1), (C4), (C7), (C9) | --| (I5) | `lhs_contracting_dimensions` | 1-dimensional tensor constant of type `si64` | (C2), (C3), (C6), (C10) | --| (I6) | `rhs_contracting_dimensions` | 1-dimensional tensor constant of type `si64` | (C2), (C4), (C8), (C10), (C16) | --| (I7) | `precision_config` | variadic number of enums of `DEFAULT`, `HIGH`, and `HIGHEST` | (C11) | -+| Label | Name | Type | Constraints | -+|-------|------------------------------|--------------------------------------------------------------|--------------------------------| -+| (I1) | `lhs` | tensor or per-tensor quantized tensor | (C5-C6), (C9-C10), (C12-C16) | -+| (I2) | `rhs` | tensor or quantized tensor | (C7-C10), (C12), (C18-C19) | -+| (I3) | `lhs_batching_dimensions` | 1-dimensional tensor constant of type `si64` | (C1), (C3), (C5), (C9), (C12) | -+| (I4) | `rhs_batching_dimensions` | 1-dimensional tensor constant of type `si64` | (C1), (C4), (C7), (C9) | -+| (I5) | `lhs_contracting_dimensions` | 1-dimensional tensor constant of type `si64` | (C2), (C3), (C6), (C10) | -+| (I6) | `rhs_contracting_dimensions` | 1-dimensional tensor constant of type `si64` | (C2), (C4), (C8), (C10), (C19) | -+| (I7) | `precision_config` | variadic number of enums of `DEFAULT`, `HIGH`, and `HIGHEST` | (C11) | - - #### Outputs - - | Name | Type | Constraints | - |----------|----------------------------|----------------------------| --| `result` | tensor or quantized tensor | (C12), (C14), (C18-C20) | -+| `result` | tensor or quantized tensor | (C12), (C14), (C16), (C18) | - - #### Constraints - -@@ -2589,17 +2589,14 @@ - * If the operation uses non-quantized tensors: - * (C13) `element_type(lhs) = element_type(rhs)`. - * If the operation uses quantized tensors: -- * (C14) `is_quantized(lhs) = is_quantized(result) and is_quantized(rhs)`. -- * (C15) `zero_points(rhs) = 0`. -- * (C16) If `is_per_axis_quantized(rhs)`, then -+ * (C14) `is_quantized(lhs) and is_quantized(rhs) and is_quantized(result)`. -+ * (C15) `storage_type(lhs) = storage_type(rhs)`. -+ * (C16) `expressed_type(lhs) = expressed_type(rhs) = expressed_type(result)`. -+ * (C17) `zero_points(rhs) = 0`. -+ * (C18) If `is_per_tensor_quantized(rhs)`, then -+ `is_per_tensor_quantized(result)`. -+ * (C19) If `is_per_axis_quantized(rhs)`, then - `quantization_dimension(rhs)` not in `rhs_contracting_dimensions`. -- * If `is_quantized(lhs)`: -- * (C17) `storage_type(lhs) = storage_type(rhs)`. -- * (C18) `expressed_type(lhs) = expressed_type(rhs) = expressed_type(result)`. -- * (C19) If `is_per_tensor_quantized(rhs)`, then -- `is_per_tensor_quantized(result)`. -- * If `!is_quantized(lhs)`: -- * (C20) `element_type(lhs) = expressed_type(rhs) = element_type(result)`. - - #### Examples - diff --ruN a/stablehlo/stablehlo/CMakeLists.txt b/stablehlo/stablehlo/CMakeLists.txt --- stablehlo/stablehlo/CMakeLists.txt +++ stablehlo/stablehlo/CMakeLists.txt @@ -250,118 +175,10 @@ diff --ruN a/stablehlo/stablehlo/CMakeLists.txt b/stablehlo/stablehlo/CMakeLists add_subdirectory(integrations) add_subdirectory(reference) add_subdirectory(tests) -diff --ruN a/stablehlo/stablehlo/conversions/linalg/tests/convolution.mlir b/stablehlo/stablehlo/conversions/linalg/tests/convolution.mlir ---- stablehlo/stablehlo/conversions/linalg/tests/convolution.mlir -+++ stablehlo/stablehlo/conversions/linalg/tests/convolution.mlir -@@ -356,7 +356,7 @@ - } - // CHECK-DAG: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0, 1, 2, 3]] : tensor<2x2x1x6xf32> into tensor<24xf32> --// CHECK: %[[EXPAND:.+]] = tensor.expand_shape %[[COLLAPSE]] {{\[}}[0, 1, 2, 3]] : tensor<24xf32> into tensor<2x2x2x3xf32> -+// CHECK: %[[EXPAND:.+]] = tensor.expand_shape %[[COLLAPSE]] {{\[}}[0, 1, 2, 3]] output_shape [2, 2, 2, 3] : tensor<24xf32> into tensor<2x2x2x3xf32> - // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<2x3x4x2x3xf32> - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> - // CHECK: %[[OUT:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm -diff --ruN a/stablehlo/stablehlo/conversions/linalg/tests/miscellaneous.mlir b/stablehlo/stablehlo/conversions/linalg/tests/miscellaneous.mlir ---- stablehlo/stablehlo/conversions/linalg/tests/miscellaneous.mlir -+++ stablehlo/stablehlo/conversions/linalg/tests/miscellaneous.mlir -@@ -865,7 +865,7 @@ - %0 = "stablehlo.reshape"(%arg0) : (tensor) -> tensor<1xi32> - func.return %0 : tensor<1xi32> - } --// CHECK: tensor.expand_shape %{{.*}} [] : tensor into tensor<1xi32> -+// CHECK: tensor.expand_shape %{{.*}} [] output_shape [1] : tensor into tensor<1xi32> - - // ----- - -@@ -876,7 +876,7 @@ - func.return %0 : tensor<1xui32> - } - // CHECK: %[[ARG_SIGNLESS:.*]] = builtin.unrealized_conversion_cast %[[ARG_UNSIGNED]] : tensor to tensor --// CHECK: %[[RET_SIGNLESS:.*]] = tensor.expand_shape %[[ARG_SIGNLESS]] [] : tensor into tensor<1xi32> -+// CHECK: %[[RET_SIGNLESS:.*]] = tensor.expand_shape %[[ARG_SIGNLESS]] [] output_shape [1] : tensor into tensor<1xi32> - // CHECK: %[[RET_UNSIGNED:.*]] = builtin.unrealized_conversion_cast %[[RET_SIGNLESS]] : tensor<1xi32> to tensor<1xui32> - // CHECK: return %[[RET_UNSIGNED]] : tensor<1xui32> - -@@ -978,7 +978,7 @@ - } - // CHECK: %[[FLATTEN:.*]] = tensor.collapse_shape %{{.*}} {{\[}}[0, 1]] : tensor into tensor - // CHECK: %[[CAST:.*]] = tensor.cast %[[FLATTEN]] : tensor to tensor<40xf32> --// CHECK: tensor.expand_shape %[[CAST]] {{\[}}[0, 1, 2]] : tensor<40xf32> into tensor<2x4x5xf32> -+// CHECK: tensor.expand_shape %[[CAST]] {{\[}}[0, 1, 2]] output_shape [2, 4, 5] : tensor<40xf32> into tensor<2x4x5xf32> - - // ----- - -@@ -988,7 +988,7 @@ - func.return %0 : tensor<1x3xi32> - } - // CHECK: %[[CAST:.*]] = tensor.cast %{{.*}} : tensor to tensor<3xi32> --// CHECK: tensor.expand_shape %[[CAST]] {{\[}}[0, 1]] : tensor<3xi32> into tensor<1x3xi32> -+// CHECK: tensor.expand_shape %[[CAST]] {{\[}}[0, 1]] output_shape [1, 3] : tensor<3xi32> into tensor<1x3xi32> - - // ----- - -diff --ruN a/stablehlo/stablehlo/conversions/linalg/tests/random.mlir b/stablehlo/stablehlo/conversions/linalg/tests/random.mlir ---- stablehlo/stablehlo/conversions/linalg/tests/random.mlir -+++ stablehlo/stablehlo/conversions/linalg/tests/random.mlir -@@ -480,8 +480,8 @@ - // CHECK-DAG: %[[VAL_101:.*]] = arith.xori %[[VAL_100]], %[[VAL_87]] : i32 - - // CHECK: linalg.yield %[[YIELDED_1:.*]], %[[YIELDED_2:.*]] : i64, i64 --// CHECK-DAG: %[[VAL_206:.*]] = tensor.expand_shape %[[VAL_207:.*]]#0 {{\[\[}}0, 1]] : tensor<4xi64> into tensor<4x1xi64> --// CHECK-DAG: %[[VAL_208:.*]] = tensor.expand_shape %[[VAL_207]]#1 {{\[\[}}0, 1]] : tensor<4xi64> into tensor<4x1xi64> -+// CHECK-DAG: %[[VAL_206:.*]] = tensor.expand_shape %[[VAL_207:.*]]#0 {{\[\[}}0, 1]] -+// CHECK-DAG: %[[VAL_208:.*]] = tensor.expand_shape %[[VAL_207]]#1 {{\[\[}}0, 1]] - // CHECK-DAG: %[[VAL_209:.*]] = tensor.empty() : tensor<4x2xi64> - // CHECK-DAG: %[[VAL_213:.*]] = tensor.insert %[[VAL_30]] into %[[VAL_0]]{{\[}}%[[VAL_19]]] : tensor<2xi64> - -@@ -575,10 +575,10 @@ - // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %[[CONCAT]] - - --// CHECK: %[[VAL_213:.*]] = tensor.expand_shape %[[COLLAPSE]] {{\[\[}}0, 1]] : tensor<80xi32> into tensor<80x1xi32> -+// CHECK: %[[VAL_213:.*]] = tensor.expand_shape %[[COLLAPSE]] {{\[\[}}0, 1]] - // CHECK: %[[VAL_214:.*]] = tensor.extract_slice %[[VAL_213]][0, 0] [77, 1] [1, 1] : tensor<80x1xi32> to tensor<77x1xi32> - // CHECK: %[[VAL_215:.*]] = tensor.collapse_shape %[[VAL_214]] {{\[\[}}0, 1]] : tensor<77x1xi32> into tensor<77xi32> --// CHECK: %[[VAL_216:.*]] = tensor.expand_shape %[[VAL_215]] {{\[\[}}0, 1]] : tensor<77xi32> into tensor<7x11xi32> -+// CHECK: %[[VAL_216:.*]] = tensor.expand_shape %[[VAL_215]] {{\[\[}}0, 1]] - // CHECK: %[[VAL_217:.*]] = tensor.insert %[[NEWSTATE]] into %[[ARG0]]{{\[}}%[[C1]]] : tensor<2xi64> - // CHECK: return %[[VAL_217]], %[[VAL_216]] : tensor<2xi64>, tensor<7x11xi32> - -@@ -616,10 +616,10 @@ - // CHECK-DAG: %[[COLLAPSE:.+]] = tensor.collapse_shape %[[CONCAT]] {{\[\[}}0, 1]] : tensor<8x2xi64> into tensor<16xi64> - - --// CHECK-DAG: %[[EXPANDED:.*]] = tensor.expand_shape %[[COLLAPSE]] {{\[\[}}0, 1]] : tensor<16xi64> into tensor<16x1xi64> -+// CHECK-DAG: %[[EXPANDED:.*]] = tensor.expand_shape %[[COLLAPSE]] {{\[\[}}0, 1]] - // CHECK-DAG: %[[SLICE:.*]] = tensor.extract_slice %[[EXPANDED]][0, 0] [15, 1] [1, 1] : tensor<16x1xi64> to tensor<15x1xi64> - // CHECK-DAG: %[[EXPAND_2:.*]] = tensor.collapse_shape %[[SLICE]] {{\[\[}}0, 1]] : tensor<15x1xi64> into tensor<15xi64> --// CHECK-DAG: %[[RESHAPE:.*]] = tensor.expand_shape %[[EXPAND_2]] {{\[\[}}0, 1]] : tensor<15xi64> into tensor<3x5xi64> -+// CHECK-DAG: %[[RESHAPE:.*]] = tensor.expand_shape %[[EXPAND_2]] {{\[\[}}0, 1]] - // CHECK-DAG: %[[INSERTED:.+]] = tensor.insert %[[NEWSTATE]] into %[[ARG0]][%[[C1]]] : tensor<2xi64> - // CHECK: return %[[INSERTED]], %[[RESHAPE]] - diff --ruN a/stablehlo/stablehlo/conversions/tosa/tests/binary.mlir b/stablehlo/stablehlo/conversions/tosa/tests/binary.mlir --- stablehlo/stablehlo/conversions/tosa/tests/binary.mlir +++ stablehlo/stablehlo/conversions/tosa/tests/binary.mlir -@@ -45,14 +45,14 @@ - - // CHECK-LABEL: @divide - func.func @divide(%arg0 : tensor<10xi32>, %arg1 : tensor<10xi32>) -> tensor<10xi32> { -- // CHECK: tosa.div -+ // CHECK: tosa.int_div - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor<10xi32>, tensor<10xi32>) -> tensor<10xi32> - return %0 : tensor<10xi32> - } - - // CHECK-LABEL: @divide_f32 - func.func @divide_f32(%arg0 : tensor<10xf32>, %arg1 : tensor<10xf32>) -> tensor<10xf32> { -- // tosa.div only supports i32, so this should not legalize. -+ // tosa.int_div only supports i32, so this should not legalize. - // CHECK: stablehlo.divide - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10xf32> - return %0 : tensor<10xf32> -@@ -123,7 +123,7 @@ +@@ -155,7 +155,7 @@ // CHECK-LABEL: @maximum_f64 func.func @maximum_f64(%arg0 : tensor<10xf64>, %arg1 : tensor<10xf64>) -> tensor<10xf64> { @@ -383,162 +200,6 @@ diff --ruN a/stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir b/stablehlo %0 = stablehlo.constant dense<0.000000e+00> : tensor<10xf64> return %0 : tensor<10xf64> } -diff --ruN a/stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll b/stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll ---- stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll -+++ stablehlo/stablehlo/conversions/tosa/transforms/StablehloLegalizeToTosa.pdll -@@ -125,7 +125,7 @@ - Pattern => - replace op(input0 : Value<_: Tosa_Int32Tensor>, - input1 : Value<_: Tosa_Int32Tensor>) -- with op(input0, input1); -+ with op(input0, input1); - Pattern => - replace op(input0 : Value<_: Tosa_Tensor>, - input1 : Value<_: Tosa_Tensor>) -diff --ruN a/stablehlo/stablehlo/dialect/Base.cpp b/stablehlo/stablehlo/dialect/Base.cpp ---- stablehlo/stablehlo/dialect/Base.cpp -+++ stablehlo/stablehlo/dialect/Base.cpp -@@ -651,14 +651,14 @@ - - // quantized_type_c5 - auto maxPosFiniteNum = -- APFloat::getLargest(quantizedElementType.getExpressedType() -- .cast() -- .getFloatSemantics()) -+ APFloat::getLargest( -+ cast(quantizedElementType.getExpressedType()) -+ .getFloatSemantics()) - .convertToDouble(); - auto minPosFiniteNum = -- APFloat::getSmallest(quantizedElementType.getExpressedType() -- .cast() -- .getFloatSemantics()) -+ APFloat::getSmallest( -+ cast(quantizedElementType.getExpressedType()) -+ .getFloatSemantics()) - .convertToDouble(); - if (llvm::any_of(scales, [&](double scale) { - return scale < minPosFiniteNum || scale > maxPosFiniteNum; -diff --ruN a/stablehlo/stablehlo/dialect/TypeInference.cpp b/stablehlo/stablehlo/dialect/TypeInference.cpp ---- stablehlo/stablehlo/dialect/TypeInference.cpp -+++ stablehlo/stablehlo/dialect/TypeInference.cpp -@@ -171,7 +171,7 @@ - LogicalResult verifyConvolutionDotGeneralCommonQuantizationConstraints( - std::optional location, Type lhsElementType, Type rhsElementType, - Type resultElementType) { -- // convolution_c28 and dot_general_c14 -+ // convolution_c28 - if (!isa(rhsElementType) || - (isa(lhsElementType) != - isa(resultElementType))) { -@@ -184,19 +184,19 @@ - auto rhsQuantType = cast(rhsElementType); - if (auto lhsQuantType = dyn_cast(lhsElementType)) { - auto resultQuantType = cast(resultElementType); -- // convolution_c31 and dot_general_c17 -+ // convolution_c31 - if (lhsQuantType.getStorageType() != rhsQuantType.getStorageType()) { - return emitOptionalError( - location, "mismatched lhs and rhs quantization storage types"); - } -- // convolution_c32 and dot_general_c18 -+ // convolution_c32 - if (lhsQuantType.getExpressedType() != rhsQuantType.getExpressedType() || - lhsQuantType.getExpressedType() != resultQuantType.getExpressedType()) { - return emitOptionalError( - location, - "mismatched lhs, rhs and result quantization expressed types"); - } -- // convolution_c33 and dot_general_c19 -+ // convolution_c33 - if (isa(rhsQuantType) && - !isa(resultQuantType)) { - return emitOptionalError( -@@ -204,7 +204,7 @@ - } - } else { - Type rhsExpressedType = rhsQuantType.getExpressedType(); -- // convolution_c34 and dot_general_c20 -+ // convolution_c34 - if (lhsElementType != rhsExpressedType || - lhsElementType != resultElementType) { - return emitOptionalError(location, -@@ -3559,7 +3559,7 @@ - } - } - -- // convolution_c28, convolution_c31 - convolution_c34 -+ // convolution_c31 - convolution_c34 - return verifyConvolutionDotGeneralCommonQuantizationConstraints( - location, lhsElementType, rhsElementType, resultElementType); - } -@@ -3626,41 +3626,6 @@ - return success(); - } - --LogicalResult verifyDotGeneralOpQuantizationConstraints( -- std::optional location, Type lhsType, Type rhsType, -- Type resultType, ArrayRef rhsContractingDimensions) { -- Type lhsElementType = getElementTypeOrSelf(lhsType); -- Type rhsElementType = getElementTypeOrSelf(rhsType); -- Type resultElementType = getElementTypeOrSelf(resultType); -- -- // dot_general_c15 -- if (auto rhsPerTensorQuantType = -- dyn_cast(rhsElementType)) { -- if (rhsPerTensorQuantType.getZeroPoint() != 0) { -- return emitOptionalError(location, "Zero point of rhs should be 0"); -- } -- } else if (auto rhsPerAxisQuantType = -- dyn_cast(rhsElementType)) { -- if (llvm::any_of(rhsPerAxisQuantType.getZeroPoints(), -- [](int64_t zero_point) { return zero_point != 0; })) { -- return emitOptionalError(location, "Zero points of rhs should be 0"); -- } -- -- // dot_general_c16 -- if (llvm::is_contained(rhsContractingDimensions, -- rhsPerAxisQuantType.getQuantizedDimension())) { -- return emitOptionalError( -- location, -- "Quantization dimension of rhs should not be in the " -- "contracting dimension of rhs"); -- } -- } -- -- // dot_general_c14, dot_general_c17 - dot_general_c20 -- return verifyConvolutionDotGeneralCommonQuantizationConstraints( -- location, lhsElementType, rhsElementType, resultElementType); --} -- - LogicalResult verifyDotGeneralOp(std::optional location, Value lhs, - Value rhs, - ArrayRef lhsBatchingDimensions, -@@ -3683,13 +3648,6 @@ - return emitOptionalError( - location, "inferred shape '", dimSizesToString(inferredShape.getDims()), - "' ", "is incompatible with return type of operation ", resultType, ""); -- -- Type lhsType = lhs.getType(); -- Type rhsType = rhs.getType(); -- if (anyQuantized({lhsType, rhsType, resultType})) { -- return verifyDotGeneralOpQuantizationConstraints( -- location, lhsType, rhsType, resultType, rhsContractingDimensions); -- } - return success(); - } - -@@ -3861,8 +3819,8 @@ - if (SmallVector shape; operandType.hasStaticShape() && - matchInts(outputShape, shape).succeeded()) { - int64_t operandCount = operandType.getNumElements(); -- int64_t shapeCount = std::accumulate(shape.begin(), shape.end(), 1, -- std::multiplies()); -+ int64_t shapeCount = std::accumulate( -+ shape.begin(), shape.end(), int64_t{1}, std::multiplies()); - if (operandCount != shapeCount) { - return emitOptionalError(location, - "output_shape is incompatible with input type " diff --ruN a/stablehlo/stablehlo/experimental/BUILD.bazel b/stablehlo/stablehlo/experimental/BUILD.bazel --- stablehlo/stablehlo/experimental/BUILD.bazel +++ stablehlo/stablehlo/experimental/BUILD.bazel @@ -2928,23052 +2589,4 @@ diff --ruN a/stablehlo/stablehlo/experimental/transforms/StablehloRefineShapes.c +} // namespace experimental +} // namespace stablehlo +} // namespace mlir -diff --ruN a/stablehlo/stablehlo/integrations/python/tests/stablehlo.py b/stablehlo/stablehlo/integrations/python/tests/stablehlo.py ---- stablehlo/stablehlo/integrations/python/tests/stablehlo.py -+++ stablehlo/stablehlo/integrations/python/tests/stablehlo.py -@@ -241,18 +241,18 @@ - # Formatted as (tensor_type, np_value) - # Program runs arg + arg, which is used for expected value - tests = [ -- # No numpy types for f8 - skipping fp8 tests -- ("f16", np.asarray(1, np.float16)), -- ("f32", np.asarray(2, np.float32)), -- ("f64", np.asarray(3, np.double)), -- ("1xi8", np.asarray([4], np.int8)), -- ("1xi16", np.asarray([5], np.int16)), -- ("1xi32", np.asarray([-6], np.int32)), -- # Numpy's uint treated as int by DenseElementsAttr, skipping np.uint tests -- ("2x2xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2,2)), -- ("2x1x2xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2,1,2)), -- ("?x?xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2,2)), -- ("?x2xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2,2)), -+ # No numpy types for f8 - skipping fp8 tests -+ ("f16", np.asarray(1, np.float16)), -+ ("f32", np.asarray(2, np.float32)), -+ ("f64", np.asarray(3, np.double)), -+ ("1xi8", np.asarray([4], np.int8)), -+ ("1xi16", np.asarray([5], np.int16)), -+ ("1xi32", np.asarray([-6], np.int32)), -+ # Numpy's uint treated as int by DenseElementsAttr, skipping np.uint tests -+ ("2x2xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2, 2)), -+ ("2x1x2xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2, 1, 2)), -+ ("?x?xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2, 2)), -+ ("?x2xf16", np.asarray([1, 2, 3, 4], np.float16).reshape(2, 2)), - ] - for test in tests: - tensor_type, arg = test -diff --ruN a/stablehlo/stablehlo/tests/ops_stablehlo_quantized.mlir b/stablehlo/stablehlo/tests/ops_stablehlo_quantized.mlir ---- stablehlo/stablehlo/tests/ops_stablehlo_quantized.mlir -+++ stablehlo/stablehlo/tests/ops_stablehlo_quantized.mlir -@@ -1066,146 +1066,6 @@ - - // ----- - --func.func @dot_general_hybrid_quantized(%arg0: tensor<2x3x4xf32>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5xf32> { -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4xf32>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5xf32> -- func.return %0 : tensor<2x4x5xf32> --} -- --// ----- -- --func.func @dot_general_c14(%arg0: tensor<2x3x4xf32>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> { -- // expected-error@+1 {{rhs should be quantized for quantized operations and is_quantized(lhs)=is_quantized(result) should hold}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4xf32>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> -- func.return %0 : tensor<2x4x5x!quant.uniform> --} -- --// ----- -- --func.func @dot_general_c15_per_tensor(%arg0: tensor<2x3x4xf32>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5xf32> { -- // expected-error@+1 {{Zero point of rhs should be 0}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4xf32>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5xf32> -- func.return %0 : tensor<2x4x5xf32> --} -- --// ----- -- --func.func @dot_general_c15_per_axis( -- %arg0: tensor<2x3x4x!quant.uniform>, -- %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> { -- // expected-error@+1 {{Zero points of rhs should be 0}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4x!quant.uniform>, -- tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> -- func.return %0 : tensor<2x4x5x!quant.uniform> --} -- --// ----- -- --func.func @dot_general_c16( -- %arg0: tensor<2x3x4x!quant.uniform>, -- %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> { -- // expected-error@+1 {{Quantization dimension of rhs should not be in the contracting dimension of rhs}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [1], -- rhs_batching_dimensions = [1], -- lhs_contracting_dimensions = [0], -- rhs_contracting_dimensions = [0] -- > -- } : (tensor<2x3x4x!quant.uniform>, -- tensor<2x3x5x!quant.uniform>) -> tensor<3x4x5x!quant.uniform> -- func.return %0 : tensor<3x4x5x!quant.uniform> --} -- --// ----- -- --func.func @dot_general_c17(%arg0: tensor<2x3x4x!quant.uniform>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> { -- // expected-error@+1 {{mismatched lhs and rhs quantization storage types}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4x!quant.uniform>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> -- func.return %0 : tensor<2x4x5x!quant.uniform> --} -- --// ----- -- --func.func @dot_general_c18(%arg0: tensor<2x3x4x!quant.uniform>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> { -- // expected-error@+1 {{mismatched lhs, rhs and result quantization expressed types}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4x!quant.uniform>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> -- func.return %0 : tensor<2x4x5x!quant.uniform> --} -- --// ----- -- --func.func @dot_general_c19(%arg0: tensor<2x3x4x!quant.uniform>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> { -- // expected-error@+1 {{mismatched rhs and result quantization granularity}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4x!quant.uniform>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5x!quant.uniform> -- func.return %0 : tensor<2x4x5x!quant.uniform> --} -- --// ----- -- --func.func @dot_general_c20(%arg0: tensor<2x3x4xf32>, %arg1: tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5xf32> { -- // expected-error@+1 {{mismatched rhs quantization expressed type and lhs and result element type}} -- %0 = "stablehlo.dot_general"(%arg0, %arg1) { -- dot_dimension_numbers = #stablehlo.dot< -- lhs_batching_dimensions = [0], -- rhs_batching_dimensions = [0], -- lhs_contracting_dimensions = [1], -- rhs_contracting_dimensions = [1] -- > -- } : (tensor<2x3x4xf32>, tensor<2x3x5x!quant.uniform>) -> tensor<2x4x5xf32> -- func.return %0 : tensor<2x4x5xf32> --} -- --// ----- -- - func.func @quantized_element_type_c8(%arg0: tensor<1x2x!quant.uniform:f32, 1.0:300>>) { - // expected-error-re@+1 {{operand #0 must be ranked tensor of {{.*}} 4/8/16/32-bit uniform quantized signed integer or 4/8/16/32-bit uniform quantized unsigned integer or 4/8/16/32-bit uniform quantized per axis signed integer or 4/8/16/32-bit uniform quantized per axis unsigned integer values, but got 'tensor<1x2x!quant.uniform>'}} - %0 = stablehlo.add %arg0, %arg0 : tensor<1x2x!quant.uniform:f32, 1.0:300>> -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_10_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_10_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_10_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_10_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -160,6 +175,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -170,6 +186,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -180,6 +197,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -190,6 +208,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -202,6 +221,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -210,6 +230,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -219,6 +240,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -228,6 +250,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -237,6 +260,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -246,6 +270,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -255,6 +280,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -264,6 +290,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -275,6 +302,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -287,6 +315,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -299,6 +328,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -313,10 +343,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -324,8 +353,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -339,8 +369,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -362,8 +393,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -380,8 +412,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -389,8 +422,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -401,8 +435,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -413,8 +448,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -442,8 +478,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -480,8 +518,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -489,8 +528,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -502,8 +542,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -531,8 +572,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -558,15 +600,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -587,8 +630,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -597,8 +641,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -606,8 +651,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -619,8 +665,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -632,8 +679,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -655,8 +703,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -678,8 +727,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -707,8 +757,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -736,8 +787,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -756,29 +808,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -794,8 +850,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -817,8 +874,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -836,22 +894,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -863,8 +924,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -876,8 +938,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -889,15 +952,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -907,8 +972,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -918,9 +984,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -929,22 +996,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -954,22 +1024,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -981,8 +1054,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -994,15 +1068,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1012,6 +1088,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1023,15 +1100,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1065,8 +1144,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1079,8 +1159,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1090,8 +1171,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1122,15 +1204,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1150,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1161,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1176,8 +1262,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1211,8 +1298,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1232,8 +1320,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1243,22 +1332,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1268,15 +1360,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1286,22 +1380,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1313,8 +1410,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1327,16 +1425,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1358,8 +1457,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1369,8 +1469,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1380,11 +1481,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1395,15 +1497,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1426,36 +1530,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1473,57 +1582,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1533,8 +1650,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1548,36 +1666,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1590,8 +1713,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1606,8 +1730,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1619,8 +1744,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1644,8 +1770,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1671,8 +1798,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1692,16 +1820,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1710,8 +1840,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1721,8 +1852,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1732,8 +1864,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1743,29 +1876,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1795,8 +1932,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1826,15 +1964,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1847,8 +1987,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1858,43 +1999,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1908,8 +2055,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1929,29 +2077,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1963,8 +2115,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1974,8 +2127,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1985,8 +2139,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2002,15 +2157,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2020,22 +2177,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2053,8 +2213,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2062,183 +2223,209 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_11_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_11_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_11_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_11_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -160,6 +175,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -170,6 +186,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -180,6 +197,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -190,6 +208,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -202,6 +221,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -210,6 +230,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -219,6 +240,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -228,6 +250,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -237,6 +260,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -246,6 +270,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -255,6 +280,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -264,6 +290,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -275,6 +302,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -287,6 +315,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -299,6 +328,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -313,10 +343,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -324,8 +353,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -339,8 +369,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -362,8 +393,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -380,8 +412,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -389,8 +422,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -401,8 +435,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -413,8 +448,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -442,8 +478,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -480,8 +518,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -489,8 +528,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -502,8 +542,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -531,8 +572,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -558,15 +600,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -587,8 +630,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -597,8 +641,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -606,8 +651,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -619,8 +665,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -632,8 +679,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -655,8 +703,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -678,8 +727,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -707,8 +757,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -736,8 +787,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -756,29 +808,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -794,8 +850,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -817,8 +874,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -836,22 +894,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -863,8 +924,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -876,8 +938,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -889,15 +952,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -907,8 +972,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -918,9 +984,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -929,22 +996,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -954,22 +1024,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -981,8 +1054,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -994,15 +1068,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1012,6 +1088,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1023,15 +1100,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1065,8 +1144,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1079,8 +1159,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1090,8 +1171,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1122,15 +1204,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1150,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1161,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1176,8 +1262,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1211,8 +1298,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1232,8 +1320,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1243,22 +1332,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1268,15 +1360,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1286,22 +1380,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1313,8 +1410,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1327,16 +1425,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1358,8 +1457,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1369,8 +1469,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1380,11 +1481,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1395,15 +1497,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1426,36 +1530,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1473,57 +1582,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1533,8 +1650,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1548,36 +1666,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1590,8 +1713,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1606,8 +1730,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1619,8 +1744,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1644,8 +1770,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1671,8 +1798,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1692,16 +1820,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1710,8 +1840,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1721,8 +1852,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1732,8 +1864,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1743,29 +1876,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1795,8 +1932,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1826,15 +1964,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1847,8 +1987,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1858,43 +1999,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1908,8 +2055,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1929,29 +2077,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1963,8 +2115,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1974,8 +2127,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1985,8 +2139,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2002,15 +2157,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2020,22 +2177,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2053,8 +2213,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2062,190 +2223,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_12_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_12_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_12_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_12_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -160,6 +175,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -170,6 +186,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -180,6 +197,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -190,6 +208,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -202,6 +221,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -210,6 +230,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -219,6 +240,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -228,6 +250,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -237,6 +260,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -246,6 +270,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -255,6 +280,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -264,6 +290,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -275,6 +302,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -287,6 +315,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -299,6 +328,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -313,10 +343,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -324,8 +353,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -339,8 +369,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -362,8 +393,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -380,8 +412,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -389,8 +422,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -401,8 +435,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -413,8 +448,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -442,8 +478,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -480,8 +518,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -489,8 +528,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -502,8 +542,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -531,8 +572,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -558,15 +600,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -587,8 +630,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -597,8 +641,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -606,8 +651,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -619,8 +665,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -632,8 +679,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -655,8 +703,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -678,8 +727,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -707,8 +757,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -736,8 +787,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -756,29 +808,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -794,8 +850,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -817,8 +874,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -836,22 +894,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -863,8 +924,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -876,8 +938,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -889,15 +952,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -907,8 +972,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -918,9 +984,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -929,22 +996,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -954,22 +1024,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -981,8 +1054,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -994,15 +1068,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1012,6 +1088,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1023,15 +1100,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1065,8 +1144,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1079,8 +1159,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1090,8 +1171,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1122,15 +1204,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1150,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1161,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1176,8 +1262,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1211,8 +1298,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1232,8 +1320,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1243,22 +1332,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1268,15 +1360,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1286,22 +1380,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1313,8 +1410,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1327,16 +1425,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1358,8 +1457,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1369,8 +1469,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1380,11 +1481,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1395,15 +1497,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1426,36 +1530,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1473,57 +1582,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1533,8 +1650,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1548,36 +1666,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1590,8 +1713,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1606,8 +1730,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1619,8 +1744,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1644,8 +1770,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1671,8 +1798,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1692,16 +1820,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1710,8 +1840,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1721,8 +1852,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1732,8 +1864,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1743,29 +1876,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1795,8 +1932,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1826,15 +1964,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1847,8 +1987,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1858,43 +1999,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1908,8 +2055,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1929,29 +2077,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1963,8 +2115,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1974,8 +2127,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1985,8 +2139,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2002,15 +2157,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2020,22 +2177,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2053,8 +2213,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2062,190 +2223,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_13_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_13_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_13_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_13_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -160,6 +175,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -170,6 +186,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -180,6 +197,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -190,6 +208,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -202,6 +221,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -210,6 +230,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -219,6 +240,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -228,6 +250,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -237,6 +260,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -246,6 +270,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -255,6 +280,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -264,6 +290,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -275,6 +302,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -287,6 +315,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -299,6 +328,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -313,10 +343,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -324,8 +353,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -339,8 +369,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -362,8 +393,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -380,8 +412,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -389,8 +422,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -401,8 +435,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -413,8 +448,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -442,8 +478,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -480,8 +518,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -489,8 +528,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -502,8 +542,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -531,8 +572,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -558,15 +600,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -587,8 +630,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -597,8 +641,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -606,8 +651,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -619,8 +665,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -632,8 +679,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -655,8 +703,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -678,8 +727,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -707,8 +757,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -736,8 +787,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -756,29 +808,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -794,8 +850,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -817,8 +874,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -836,22 +894,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -863,8 +924,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -876,8 +938,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -889,15 +952,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -907,8 +972,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -918,9 +984,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -929,22 +996,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -954,22 +1024,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -981,8 +1054,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -994,15 +1068,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1012,6 +1088,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1023,15 +1100,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1065,8 +1144,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1079,8 +1159,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1090,8 +1171,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1122,15 +1204,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1150,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1161,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1176,8 +1262,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1211,8 +1298,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1232,8 +1320,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1243,22 +1332,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1268,15 +1360,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1286,22 +1380,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1313,8 +1410,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1327,16 +1425,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1358,8 +1457,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1369,8 +1469,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1380,11 +1481,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1395,15 +1497,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1426,36 +1530,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1473,57 +1582,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1533,8 +1650,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1548,36 +1666,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1590,8 +1713,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1606,8 +1730,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1619,8 +1744,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1644,8 +1770,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1671,8 +1798,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1692,16 +1820,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1710,8 +1840,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1721,8 +1852,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1732,8 +1864,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1743,29 +1876,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1795,8 +1932,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1826,15 +1964,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1847,8 +1987,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1858,43 +1999,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1908,8 +2055,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1929,29 +2077,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1963,8 +2115,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1974,8 +2127,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1985,8 +2139,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2002,15 +2157,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2020,22 +2177,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2053,8 +2213,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2062,190 +2223,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_14_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_14_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_14_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_14_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -160,6 +175,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -170,6 +186,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -180,6 +197,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -190,6 +208,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -202,6 +221,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -210,6 +230,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -219,6 +240,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -228,6 +250,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -237,6 +260,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -246,6 +270,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -255,6 +280,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -264,6 +290,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -275,6 +302,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -287,6 +315,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -299,6 +328,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -313,10 +343,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -324,8 +353,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -339,8 +369,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -362,8 +393,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -380,8 +412,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -389,8 +422,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -401,8 +435,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -413,8 +448,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -442,8 +478,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -480,8 +518,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -489,8 +528,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -502,8 +542,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -531,8 +572,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -558,15 +600,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -587,8 +630,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -597,8 +641,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -606,8 +651,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -619,8 +665,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -632,8 +679,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -655,8 +703,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -678,8 +727,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -707,8 +757,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -736,8 +787,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -756,29 +808,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -794,8 +850,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -817,8 +874,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -836,22 +894,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -863,8 +924,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -876,8 +938,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -889,15 +952,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -907,8 +972,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -918,9 +984,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -929,22 +996,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -954,22 +1024,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -981,8 +1054,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -994,15 +1068,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1012,6 +1088,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1023,15 +1100,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1065,8 +1144,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1079,8 +1159,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1090,8 +1171,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1122,15 +1204,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1150,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1161,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1176,8 +1262,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1211,8 +1298,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1232,8 +1320,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1243,22 +1332,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1268,15 +1360,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1286,22 +1380,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1313,8 +1410,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1327,16 +1425,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1358,8 +1457,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1369,8 +1469,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1380,11 +1481,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1395,15 +1497,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1426,36 +1530,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1473,57 +1582,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1533,8 +1650,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1548,36 +1666,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1590,8 +1713,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1606,8 +1730,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1619,8 +1744,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1644,8 +1770,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1671,8 +1798,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1692,16 +1820,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1710,8 +1840,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1721,8 +1852,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1732,8 +1864,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1743,29 +1876,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1795,8 +1932,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1826,15 +1964,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1847,8 +1987,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1858,43 +1999,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1908,8 +2055,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1929,29 +2077,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1963,8 +2115,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1974,8 +2127,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1985,8 +2139,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2002,15 +2157,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2020,22 +2177,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2053,8 +2213,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2062,190 +2223,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_15_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_15_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_15_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_15_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -166,6 +181,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -176,6 +192,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -186,6 +203,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -196,6 +214,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -208,6 +227,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -216,6 +236,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -225,6 +246,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -234,6 +256,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -243,6 +266,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -252,6 +276,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -261,6 +286,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -270,6 +296,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -281,6 +308,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -293,6 +321,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -305,6 +334,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -319,10 +349,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -330,8 +359,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -345,8 +375,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -368,8 +399,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -386,8 +418,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -395,8 +428,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -407,8 +441,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -419,8 +454,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -448,8 +484,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -466,8 +503,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -486,8 +524,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -495,8 +534,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -508,8 +548,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -537,8 +578,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -564,15 +606,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -593,8 +636,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -603,8 +647,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -612,8 +657,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -625,8 +671,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -638,8 +685,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -661,8 +709,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -684,8 +733,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -713,8 +763,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -742,8 +793,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -762,29 +814,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -800,8 +856,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -823,8 +880,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -842,22 +900,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -869,8 +930,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -882,8 +944,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -895,15 +958,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -913,8 +978,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -924,9 +990,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -935,22 +1002,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -960,22 +1030,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -987,8 +1060,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1000,15 +1074,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1018,6 +1094,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1029,15 +1106,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1071,8 +1150,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1085,8 +1165,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1096,8 +1177,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1128,15 +1210,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1156,8 +1240,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1167,8 +1252,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1182,8 +1268,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1217,8 +1304,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1238,8 +1326,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1249,22 +1338,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1274,15 +1366,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1292,22 +1386,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1319,8 +1416,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1333,16 +1431,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1364,8 +1463,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1375,8 +1475,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1386,11 +1487,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1401,15 +1503,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1432,36 +1536,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1479,57 +1588,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1539,8 +1656,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1554,36 +1672,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1596,8 +1719,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1612,8 +1736,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1625,8 +1750,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1650,8 +1776,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1677,8 +1804,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1698,16 +1826,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1716,8 +1846,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1727,8 +1858,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1738,8 +1870,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1749,29 +1882,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1801,8 +1938,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1832,15 +1970,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1853,8 +1993,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1864,43 +2005,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1914,8 +2061,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1935,29 +2083,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1969,8 +2121,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1980,8 +2133,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1991,8 +2145,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2008,15 +2163,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2026,22 +2183,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2059,8 +2219,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2068,190 +2229,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_16_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_16_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_16_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_16_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -166,6 +181,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -176,6 +192,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -186,6 +203,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -196,6 +214,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -208,6 +227,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -216,6 +236,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -225,6 +246,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -234,6 +256,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -243,6 +266,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -252,6 +276,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -261,6 +286,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -270,6 +296,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -281,6 +308,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -293,6 +321,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -305,6 +334,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -319,10 +349,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -330,8 +359,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -345,8 +375,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -368,8 +399,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -386,8 +418,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -395,8 +428,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -407,8 +441,9 @@ - } - - // CHECK-LABEL: "default_collective_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_broadcast(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -419,8 +454,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -431,8 +467,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -478,8 +516,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -498,8 +537,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -507,8 +547,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -520,8 +561,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -549,8 +591,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -576,15 +619,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -605,8 +649,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -615,8 +660,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -624,8 +670,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -637,8 +684,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -650,8 +698,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -673,8 +722,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -696,8 +746,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -725,8 +776,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -754,8 +806,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -774,29 +827,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -812,8 +869,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -835,8 +893,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -854,22 +913,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -881,8 +943,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -894,8 +957,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -907,15 +971,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -925,8 +991,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -936,9 +1003,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -947,22 +1015,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -972,22 +1043,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -999,8 +1073,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1012,15 +1087,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1030,6 +1107,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1041,15 +1119,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1083,8 +1163,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1097,8 +1178,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1108,8 +1190,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1140,15 +1223,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1168,8 +1253,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1179,8 +1265,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1194,8 +1281,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1229,8 +1317,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1250,8 +1339,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1261,22 +1351,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1286,15 +1379,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1304,22 +1399,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1331,8 +1429,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1345,16 +1444,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1376,8 +1476,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1387,8 +1488,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1398,11 +1500,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1413,15 +1516,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1444,36 +1549,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1491,57 +1601,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1551,8 +1669,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1566,36 +1685,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1608,8 +1732,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1624,8 +1749,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1637,8 +1763,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1662,8 +1789,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1689,8 +1817,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1710,16 +1839,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1728,8 +1859,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1739,8 +1871,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1750,8 +1883,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1761,29 +1895,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1813,8 +1951,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1844,15 +1983,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1865,8 +2006,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1876,43 +2018,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1926,8 +2074,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1947,29 +2096,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1981,8 +2134,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1992,8 +2146,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -2003,8 +2158,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2020,15 +2176,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2038,22 +2196,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2071,8 +2232,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2080,190 +2242,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_17_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_17_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_17_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_17_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -166,6 +181,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -176,6 +192,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -186,6 +203,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -196,6 +214,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -208,6 +227,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -216,6 +236,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -225,6 +246,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -234,6 +256,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -243,6 +266,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -252,6 +276,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -261,6 +286,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -270,6 +296,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -281,6 +308,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -293,6 +321,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -305,6 +334,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -319,10 +349,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -330,8 +359,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -345,8 +375,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -368,8 +399,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -386,8 +418,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -395,8 +428,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -407,8 +441,9 @@ - } - - // CHECK-LABEL: "default_collective_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_broadcast(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -419,8 +454,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -431,8 +467,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -478,8 +516,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -498,8 +537,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -507,8 +547,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -520,8 +561,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -549,8 +591,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -576,15 +619,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -605,8 +649,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -615,8 +660,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -624,8 +670,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -637,8 +684,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -650,8 +698,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -673,8 +722,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -696,8 +746,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -725,8 +776,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -754,8 +806,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -774,29 +827,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -812,8 +869,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -836,7 +894,7 @@ - - // CHECK-LABEL: "op_all_reduce_with_promotable_types" - func.func @op_all_reduce_with_promotable_types(%operand: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -854,8 +912,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -873,22 +932,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -900,8 +962,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -913,8 +976,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -926,15 +990,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -944,8 +1010,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -955,9 +1022,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -966,22 +1034,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -991,22 +1062,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -1018,8 +1092,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1031,15 +1106,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1049,6 +1126,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1060,15 +1138,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1102,8 +1182,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1116,8 +1197,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1127,8 +1209,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1159,15 +1242,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1187,8 +1272,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1198,8 +1284,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1213,8 +1300,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1248,8 +1336,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1269,8 +1358,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1280,22 +1370,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1305,15 +1398,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1323,22 +1418,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1350,8 +1448,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1364,16 +1463,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1395,8 +1495,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1406,8 +1507,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1417,11 +1519,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1432,15 +1535,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1463,36 +1568,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1510,57 +1620,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1570,8 +1688,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1585,36 +1704,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1627,8 +1751,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1643,8 +1768,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1658,7 +1784,7 @@ - // CHECK_lABEL: "op_reduce_with_promotable_types" - func.func @op_reduce_with_promotable_types(%arg0: tensor<4x4xf32>, %arg1 : tensor) - -> (tensor<4xf64>) { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0:.*]], %[[ARG1:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f64_v1> -@@ -1673,8 +1799,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1699,7 +1826,7 @@ - - // CHECK_lABEL: "op_reduce_scatter_with_promotable_types" - func.func @op_reduce_scatter_with_promotable_types(%data: tensor<4x16xf32>) -> tensor<4x4xf64> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f64_v1> -@@ -1716,8 +1843,9 @@ - - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1742,11 +1870,11 @@ - func.return %0 : tensor<2x9x16x7xf32> - } - --// CHECK_lABEL: "op_reduce_window_with_promotable_types" -+// CHECK-LABEL: "op_reduce_window_with_promotable_types" - func.func @op_reduce_window_with_promotable_types(%arg0: tensor<4x2xf32>, - %arg1: tensor<4x2xf32>, %init0: tensor, %init1: tensor) -> - (tensor<2x2xf64>, tensor<2x2xf32>) { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1, %arg2, %arg3) -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1, %[[ARG3:arg.*]]: !vhlo.tensor_v1, %[[ARG4:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]], %[[VAL2:.*]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1, !vhlo.tensor_v1) -> (!vhlo.tensor_v1<2x2x!vhlo.f64_v1>, !vhlo.tensor_v1<2x2x!vhlo.f32_v1>) -@@ -1766,8 +1894,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1787,16 +1916,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1805,8 +1936,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1816,8 +1948,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1827,8 +1960,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1838,29 +1972,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1893,7 +2031,7 @@ - func.func @op_scatter_with_promotable_types(%input_tensor: tensor<200x100x300xf32>, - %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) -> - tensor<200x100x300xf64> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>, !vhlo.tensor_v1<10x2x!vhlo.i32_v1>, !vhlo.tensor_v1<10x300x!vhlo.f32_v1>) -> !vhlo.tensor_v1<200x100x300x!vhlo.f64_v1> -@@ -1916,8 +2054,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1947,8 +2086,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter_with_promotable_types" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter_with_promotable_types(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf64> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: %[[VAL:.*]] = "vhlo.add_v1"(%[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - // CHECK: "vhlo.return_v1"(%[[VAL]]) : (!vhlo.tensor_v1) -> () -@@ -1970,15 +2110,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1991,8 +2133,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -2002,43 +2145,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -2052,8 +2201,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -2073,29 +2223,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -2107,8 +2261,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -2118,8 +2273,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -2129,8 +2285,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2146,15 +2303,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2164,22 +2323,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2197,8 +2359,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2206,190 +2369,217 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_18_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_18_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_18_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_18_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -166,6 +181,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -176,6 +192,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -186,6 +203,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -196,6 +214,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -208,6 +227,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -216,6 +236,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -225,6 +246,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -234,6 +256,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -243,6 +266,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -252,6 +276,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -261,6 +286,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -270,6 +296,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -281,6 +308,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -293,6 +321,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -305,6 +334,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -319,10 +349,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -330,8 +359,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -345,8 +375,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -368,8 +399,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -386,8 +418,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -395,8 +428,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -407,8 +441,9 @@ - } - - // CHECK-LABEL: "default_collective_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_broadcast(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -419,8 +454,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -431,8 +467,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -478,8 +516,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -498,8 +537,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -507,8 +547,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -520,8 +561,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -549,8 +591,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -576,15 +619,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -605,8 +649,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -615,8 +660,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -624,8 +670,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -637,8 +684,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -650,8 +698,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -673,8 +722,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -696,8 +746,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -725,8 +776,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -754,8 +806,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -774,29 +827,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -812,8 +869,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -836,7 +894,7 @@ - - // CHECK-LABEL: "op_all_reduce_with_promotable_types" - func.func @op_all_reduce_with_promotable_types(%operand: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -854,8 +912,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -873,22 +932,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -900,8 +962,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -913,8 +976,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -926,15 +990,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -944,8 +1010,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -955,9 +1022,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -966,22 +1034,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -991,22 +1062,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -1018,8 +1092,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1031,15 +1106,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1049,6 +1126,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1060,15 +1138,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1102,8 +1182,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1116,8 +1197,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1127,8 +1209,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1159,15 +1242,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1187,8 +1272,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1198,8 +1284,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1213,8 +1300,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1248,8 +1336,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1269,8 +1358,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1280,22 +1370,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1305,15 +1398,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1323,22 +1418,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1350,8 +1448,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1364,16 +1463,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1395,8 +1495,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1406,8 +1507,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1417,11 +1519,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1432,15 +1535,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1463,36 +1568,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1510,57 +1620,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1570,8 +1688,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1585,36 +1704,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1627,8 +1751,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1643,8 +1768,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1658,7 +1784,7 @@ - // CHECK_lABEL: "op_reduce_with_promotable_types" - func.func @op_reduce_with_promotable_types(%arg0: tensor<4x4xf32>, %arg1 : tensor) - -> (tensor<4xf64>) { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0:.*]], %[[ARG1:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f64_v1> -@@ -1673,8 +1799,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1699,7 +1826,7 @@ - - // CHECK_lABEL: "op_reduce_scatter_with_promotable_types" - func.func @op_reduce_scatter_with_promotable_types(%data: tensor<4x16xf32>) -> tensor<4x4xf64> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f64_v1> -@@ -1716,8 +1843,9 @@ - - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1746,7 +1874,7 @@ - func.func @op_reduce_window_with_promotable_types(%arg0: tensor<4x2xf32>, - %arg1: tensor<4x2xf32>, %init0: tensor, %init1: tensor) -> - (tensor<2x2xf64>, tensor<2x2xf32>) { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1, %arg2, %arg3) -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1, %[[ARG3:arg.*]]: !vhlo.tensor_v1, %[[ARG4:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]], %[[VAL2:.*]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1, !vhlo.tensor_v1) -> (!vhlo.tensor_v1<2x2x!vhlo.f64_v1>, !vhlo.tensor_v1<2x2x!vhlo.f32_v1>) -@@ -1766,8 +1894,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1787,16 +1916,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1805,8 +1936,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1816,8 +1948,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1827,8 +1960,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1838,29 +1972,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1893,7 +2031,7 @@ - func.func @op_scatter_with_promotable_types(%input_tensor: tensor<200x100x300xf32>, - %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) -> - tensor<200x100x300xf64> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>, !vhlo.tensor_v1<10x2x!vhlo.i32_v1>, !vhlo.tensor_v1<10x300x!vhlo.f32_v1>) -> !vhlo.tensor_v1<200x100x300x!vhlo.f64_v1> -@@ -1916,8 +2054,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1947,8 +2086,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter_with_promotable_types" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter_with_promotable_types(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf64> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: %[[VAL:.*]] = "vhlo.add_v1"(%[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - // CHECK: "vhlo.return_v1"(%[[VAL]]) : (!vhlo.tensor_v1) -> () -@@ -1970,15 +2110,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1991,8 +2133,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -2002,43 +2145,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -2052,8 +2201,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -2073,29 +2223,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -2107,8 +2261,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -2118,8 +2273,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -2129,8 +2285,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2146,15 +2303,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2164,22 +2323,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2197,8 +2359,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2206,197 +2369,225 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_per_tensor_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_per_tensor_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_per_axis_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_per_axis_quantization(%arg0: tensor<2x!quant.uniform>) -> tensor<2x!quant.uniform> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg0) : (!vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>, !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>) -> !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG0]]) : (!vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>, !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>) -> !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1> - %0 = stablehlo.add %arg0, %arg0 : tensor<2x!quant.uniform> - func.return %0 : tensor<2x!quant.uniform> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_19_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_19_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_19_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_19_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -166,6 +181,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -176,6 +192,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -186,6 +203,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -196,6 +214,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -208,6 +227,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -216,6 +236,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -225,6 +246,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -234,6 +256,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -243,6 +266,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -252,6 +276,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -261,6 +286,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -270,6 +296,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -281,6 +308,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -293,6 +321,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -305,6 +334,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -319,10 +349,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -330,8 +359,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -345,8 +375,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -368,8 +399,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -386,8 +418,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -395,8 +428,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -407,8 +441,9 @@ - } - - // CHECK-LABEL: "default_collective_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_broadcast(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -419,8 +454,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -431,8 +467,9 @@ - } - - // CHECK-LABEL: "default_composite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_composite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.composite_v1"(%arg0) <{ -+ // CHECK: "vhlo.composite_v1"(%[[ARG0]]) <{ - // CHECK-SAME: composite_attributes = #vhlo.dict_v1<{}> - // CHECK-SAME: decomposition = #vhlo.string_v1<"composite_target"> - // CHECK-SAME: name = #vhlo.string_v1<"stablehlo.composite_target"> -@@ -446,8 +483,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -475,8 +513,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -493,8 +532,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -513,8 +553,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -522,8 +563,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -535,8 +577,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -564,8 +607,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -591,15 +635,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -620,8 +665,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -630,8 +676,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -639,8 +686,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -652,8 +700,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -665,8 +714,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -688,8 +738,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -711,8 +762,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -740,8 +792,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -769,8 +822,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -789,29 +843,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -827,8 +885,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -851,7 +910,7 @@ - - // CHECK-LABEL: "op_all_reduce_with_promotable_types" - func.func @op_all_reduce_with_promotable_types(%operand: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -869,8 +928,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -888,22 +948,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -915,8 +978,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -928,8 +992,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -941,15 +1006,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -959,8 +1026,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -970,9 +1038,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -981,22 +1050,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -1006,22 +1078,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -1033,8 +1108,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1046,15 +1122,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_composite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_composite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.composite_v1"(%arg0) <{ -+ // CHECK: "vhlo.composite_v1"(%[[ARG0]]) <{ - // CHECK-SAME: composite_attributes = #vhlo.dict_v1<{#vhlo.string_v1<"my_int"> = #vhlo.integer_v1<1 : i64>, #vhlo.string_v1<"my_string"> = #vhlo.string_v1<"foo">}> - // CHECK-SAME: decomposition = #vhlo.string_v1<"composite_target"> - // CHECK-SAME: name = #vhlo.string_v1<"stablehlo.composite_target"> -@@ -1073,8 +1151,9 @@ - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1084,6 +1163,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1095,15 +1175,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1137,8 +1219,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1151,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1162,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1194,15 +1279,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1222,8 +1309,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1233,8 +1321,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1248,8 +1337,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1283,8 +1373,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1304,8 +1395,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1315,22 +1407,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1340,15 +1435,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1358,22 +1455,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1385,8 +1485,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1399,16 +1500,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1430,8 +1532,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1441,8 +1544,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1452,11 +1556,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1467,15 +1572,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1498,36 +1605,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1545,57 +1657,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1605,8 +1725,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1620,36 +1741,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1662,8 +1788,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1678,8 +1805,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1690,10 +1818,10 @@ - func.return %0 : tensor - } - --// CHECK_lABEL: "op_reduce_with_promotable_types" -+// CHECK-LABEL: "op_reduce_with_promotable_types" - func.func @op_reduce_with_promotable_types(%arg0: tensor<4x4xf32>, %arg1 : tensor) - -> (tensor<4xf64>) { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0:.*]], %[[ARG1:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f64_v1> -@@ -1708,8 +1836,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1734,7 +1863,7 @@ - - // CHECK_lABEL: "op_reduce_scatter_with_promotable_types" - func.func @op_reduce_scatter_with_promotable_types(%data: tensor<4x16xf32>) -> tensor<4x4xf64> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f64_v1> -@@ -1751,8 +1880,9 @@ - - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1781,7 +1911,7 @@ - func.func @op_reduce_window_with_promotable_types(%arg0: tensor<4x2xf32>, - %arg1: tensor<4x2xf32>, %init0: tensor, %init1: tensor) -> - (tensor<2x2xf64>, tensor<2x2xf32>) { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1, %arg2, %arg3) -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1, %[[ARG3:arg.*]]: !vhlo.tensor_v1, %[[ARG4:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]], %[[VAL2:.*]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1, !vhlo.tensor_v1) -> (!vhlo.tensor_v1<2x2x!vhlo.f64_v1>, !vhlo.tensor_v1<2x2x!vhlo.f32_v1>) -@@ -1801,8 +1931,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1822,16 +1953,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1840,8 +1973,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1851,8 +1985,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1862,8 +1997,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1873,29 +2009,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1928,7 +2068,7 @@ - func.func @op_scatter_with_promotable_types(%input_tensor: tensor<200x100x300xf32>, - %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) -> - tensor<200x100x300xf64> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>, !vhlo.tensor_v1<10x2x!vhlo.i32_v1>, !vhlo.tensor_v1<10x300x!vhlo.f32_v1>) -> !vhlo.tensor_v1<200x100x300x!vhlo.f64_v1> -@@ -1951,8 +2091,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1982,8 +2123,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter_with_promotable_types" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter_with_promotable_types(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf64> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: %[[VAL:.*]] = "vhlo.add_v1"(%[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - // CHECK: "vhlo.return_v1"(%[[VAL]]) : (!vhlo.tensor_v1) -> () -@@ -2005,15 +2147,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -2026,8 +2170,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -2037,43 +2182,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -2087,8 +2238,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -2108,29 +2260,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -2142,8 +2298,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -2153,8 +2310,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -2164,8 +2322,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2181,15 +2340,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2199,22 +2360,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2232,8 +2396,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2241,197 +2406,225 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_per_tensor_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_per_tensor_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_per_axis_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_per_axis_quantization(%arg0: tensor<2x!quant.uniform>) -> tensor<2x!quant.uniform> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg0) : (!vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>, !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>) -> !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG0]]) : (!vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>, !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>) -> !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1> - %0 = stablehlo.add %arg0, %arg0 : tensor<2x!quant.uniform> - func.return %0 : tensor<2x!quant.uniform> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_9_0.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_9_0.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_9_0.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.0_9_0.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -160,6 +175,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -170,6 +186,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -180,6 +197,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -190,6 +208,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -202,6 +221,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -210,6 +230,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -219,6 +240,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -228,6 +250,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -237,6 +260,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -246,6 +270,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -255,6 +280,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -264,6 +290,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -275,6 +302,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -287,6 +315,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -299,6 +328,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -313,10 +343,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -324,8 +353,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -339,8 +369,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -362,8 +393,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -380,8 +412,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -389,8 +422,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -401,8 +435,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -413,8 +448,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -442,8 +478,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -460,8 +497,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -480,8 +518,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -489,8 +528,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -502,8 +542,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -531,8 +572,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -558,15 +600,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -587,8 +630,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -597,8 +641,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -606,8 +651,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -619,8 +665,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -632,8 +679,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -655,8 +703,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -678,8 +727,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -707,8 +757,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -736,8 +787,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -756,29 +808,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -794,8 +850,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -817,8 +874,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -836,22 +894,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -863,8 +924,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -876,8 +938,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -889,15 +952,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -907,8 +972,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -918,9 +984,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -929,22 +996,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -954,22 +1024,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -981,8 +1054,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -994,15 +1068,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1012,6 +1088,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1023,15 +1100,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1065,8 +1144,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1079,8 +1159,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1090,8 +1171,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1122,15 +1204,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1150,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1161,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1176,8 +1262,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1211,8 +1298,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1232,8 +1320,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1243,22 +1332,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1268,15 +1360,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1286,22 +1380,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1313,8 +1410,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1327,16 +1425,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1358,8 +1457,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1369,8 +1469,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1380,11 +1481,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1395,15 +1497,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1426,36 +1530,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1473,57 +1582,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1533,8 +1650,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1548,36 +1666,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1590,8 +1713,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1606,8 +1730,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1619,8 +1744,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1644,8 +1770,9 @@ - } - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1671,8 +1798,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1692,16 +1820,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1710,8 +1840,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1721,8 +1852,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1732,8 +1864,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1743,29 +1876,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1795,8 +1932,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1826,15 +1964,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1847,8 +1987,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -1858,43 +1999,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1908,8 +2055,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -1929,29 +2077,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -1963,8 +2115,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -1974,8 +2127,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -1985,8 +2139,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2002,15 +2157,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2020,22 +2177,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2053,8 +2213,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2062,169 +2223,193 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" -diff --ruN a/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.mlir b/stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.mlir ---- stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.mlir -+++ stablehlo/stablehlo/tests/vhlo/stablehlo_legalize_to_vhlo.mlir -@@ -13,6 +13,7 @@ - // ============ ATTRIBUTES ============ - - // CHECK-LABEL: "attr_comparison_direction_eq" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_eq(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -22,6 +23,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ne" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ne(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -31,6 +33,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_ge" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_ge(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -40,6 +43,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_gt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_gt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -49,6 +53,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_le" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_le(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -58,6 +63,7 @@ - } - - // CHECK-LABEL: "attr_comparison_direction_lt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_direction_lt(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - // CHECK: comparison_direction = #vhlo -@@ -67,6 +73,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_notype" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_notype(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo -@@ -76,6 +83,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_float" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_float(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -86,6 +94,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_totalorder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_totalorder(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -96,6 +105,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_signed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_signed(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -106,6 +116,7 @@ - } - - // CHECK-LABEL: "attr_comparison_type_unsigned" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_comparison_type_unsigned(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = "stablehlo.compare"(%arg0, %arg1) { - comparison_direction = #stablehlo, -@@ -118,6 +129,7 @@ - // ConvDimensionNumbers aka #stablehlo.conv is covered below. - - // CHECK-LABEL: "attr_custom_call_api_version_unspecified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_unspecified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -128,6 +140,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_original" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_original(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -138,6 +151,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -148,6 +162,7 @@ - } - - // CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor) -> tensor { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo", -@@ -166,6 +181,7 @@ - // DotDimensionNumbers aka #stablehlo.dot is covered below. - - // CHECK-LABEL: "attr_fft_type_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -176,6 +192,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_ifft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -186,6 +203,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_rfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -196,6 +214,7 @@ - } - - // CHECK-LABEL: "attr_fft_type_irfft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex>) -> tensor<16xf32> { - %0 = "stablehlo.fft"(%arg0) { - // CHECK: fft_type = #vhlo -@@ -208,6 +227,7 @@ - // GatherDimensionNumbers aka #stablehlo.gather is covered below. - - // CHECK-LABEL: "attr_precision_config_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -216,6 +236,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_high" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -225,6 +246,7 @@ - } - - // CHECK-LABEL: "attr_precision_config_highest" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { - %0 = "stablehlo.dot"(%arg0, %arg1) { - // CHECK: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> -@@ -234,6 +256,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_default" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_default(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -243,6 +266,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_three_fry" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_three_fry(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -252,6 +276,7 @@ - } - - // CHECK-LABEL: "attr_rng_algorithm_philox" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @attr_rng_algorithm_philox(%arg0: tensor) -> (tensor, tensor) { - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { - // CHECK: rng_algorithm = #vhlo -@@ -261,6 +286,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_uniform" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_uniform(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -270,6 +296,7 @@ - } - - // CHECK-LABEL: "attr_rng_distribution_normal" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @attr_rng_distribution_normal(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { - // CHECK: rng_distribution = #vhlo -@@ -281,6 +308,7 @@ - // ScatterDimensionNumbers aka #stablehlo.scatter is covered below. - - // CHECK-LABEL: "attr_transpose_no_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -293,6 +321,7 @@ - } - - // CHECK-LABEL: "attr_transpose_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -305,6 +334,7 @@ - } - - // CHECK-LABEL: "attr_transpose_adjoint" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { - %0 = "stablehlo.triangular_solve"(%arg0, %arg1) { - left_side = true, -@@ -319,10 +349,9 @@ - // TypeExtensionsAttr aka #stablehlo.type_extensions is covered below. - - // CHECK-LABEL: "attr_type_extensions_bounds" --func.func @attr_type_extensions_bounds( -- %arg0: tensor>) -- -> tensor> { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1>) -> () -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) -+func.func @attr_type_extensions_bounds(%arg0: tensor>) -> tensor> { -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> () - func.return %arg0 : tensor> - } - -@@ -330,8 +359,9 @@ - // ============ DEFAULTS ============ - - // CHECK-LABEL: "default_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -345,8 +375,9 @@ - } - - // CHECK-LABEL: "default_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) - // CHECK-SAME: <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -368,8 +399,9 @@ - } - - // CHECK-LABEL: "default_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -386,8 +418,9 @@ - } - - // CHECK-LABEL: "default_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32> -@@ -395,8 +428,9 @@ - } - - // CHECK-LABEL: "default_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -407,8 +441,9 @@ - } - - // CHECK-LABEL: "default_collective_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_collective_broadcast(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -419,8 +454,9 @@ - } - - // CHECK-LABEL: "default_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -431,8 +467,9 @@ - } - - // CHECK-LABEL: "default_composite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_composite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.composite_v1"(%arg0) <{ -+ // CHECK: "vhlo.composite_v1"(%[[ARG0]]) <{ - // CHECK-SAME: composite_attributes = #vhlo.dict_v1<{}> - // CHECK-SAME: decomposition = #vhlo.string_v1<"composite_target"> - // CHECK-SAME: name = #vhlo.string_v1<"stablehlo.composite_target"> -@@ -446,8 +483,9 @@ - } - - // CHECK-LABEL: "default_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -475,8 +513,9 @@ - } - - // CHECK-LABEL: "default_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -493,8 +532,9 @@ - } - - // CHECK-LABEL: "default_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -513,8 +553,9 @@ - } - - // CHECK-LABEL: "default_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32> -@@ -522,8 +563,9 @@ - } - - // CHECK-LABEL: "default_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<0xi64>> -@@ -535,8 +577,9 @@ - } - - // CHECK-LABEL: "default_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -564,8 +607,9 @@ - } - - // CHECK-LABEL: "default_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -591,15 +635,16 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"default_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<""> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - func.return %arg0 : tensor - } - - // CHECK-LABEL: "dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -620,8 +665,9 @@ - } - - // CHECK-LABEL: "default_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -630,8 +676,9 @@ - } - - // CHECK-LABEL: "default_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<""> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor, !stablehlo.token) -> !stablehlo.token -@@ -639,8 +686,9 @@ - } - - // CHECK-LABEL: "default_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -652,8 +700,9 @@ - } - - // CHECK-LABEL: "default_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -665,8 +714,9 @@ - } - - // CHECK-LABEL: "default_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -688,8 +738,9 @@ - } - - // CHECK-LABEL: "default_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x16x30x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -711,8 +762,9 @@ - } - - // CHECK-LABEL: "default_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -740,8 +792,9 @@ - } - - // CHECK-LABEL: "default_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -769,8 +822,9 @@ - } - - // CHECK-LABEL: "default_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<-1 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -789,29 +843,33 @@ - // ============ OPS ============ - - // CHECK-LABEL: "op_abs" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_abs(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_add" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_add(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_after_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1 -+ // CHECK: "vhlo.after_all_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token - func.return %0 : !stablehlo.token - } - - // CHECK-LABEL: "op_all_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.all_gather_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_gather_v1"(%[[ARG0]]) <{ - // CHECK-SAME: all_gather_dim = #vhlo.integer_v1<1 : i64> - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, -@@ -827,8 +885,9 @@ - } - - // CHECK-LABEL: "op_all_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_reduce(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: use_global_device_ids = #vhlo.bool_v1 -@@ -851,7 +910,7 @@ - - // CHECK-LABEL: "op_all_reduce_with_promotable_types" - func.func @op_all_reduce_with_promotable_types(%operand: tensor) -> tensor { -- // CHECK: "vhlo.all_reduce_v1"(%arg0) -+ // CHECK: "vhlo.all_reduce_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -869,8 +928,9 @@ - } - - // CHECK-LABEL: "op_all_to_all" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> { -- // CHECK: "vhlo.all_to_all_v1"(%arg0) <{ -+ // CHECK: "vhlo.all_to_all_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: concat_dimension = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<1x4xi64>>, -@@ -888,22 +948,25 @@ - } - - // CHECK-LABEL: "op_and" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_and(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_atan2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_atan2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.atan2_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_batch_norm_grad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_grad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -915,8 +978,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_inference" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> { -- // CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{ -+ // CHECK: "vhlo.batch_norm_inference_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1> -@@ -928,8 +992,9 @@ - } - - // CHECK-LABEL: "op_batch_norm_training" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) { -- // CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.batch_norm_training_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>, - // CHECK-SAME: feature_index = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -@@ -941,15 +1006,17 @@ - } - - // CHECK-LABEL: "op_bitcast_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_bitcast_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.bitcast_convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_in_dim_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast_in_dim"(%arg0) { -@@ -959,8 +1026,9 @@ - } - - // CHECK-LABEL: "op_broadcast" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.broadcast_v1"(%arg0) <{ -+ // CHECK: "vhlo.broadcast_v1"(%[[ARG0]]) <{ - // CHECK-SAME: broadcast_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1> - %0 = "stablehlo.broadcast"(%arg0) { -@@ -970,9 +1038,10 @@ - } - - // CHECK-LABEL: "op_case" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_case(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -981,22 +1050,25 @@ - } - - // CHECK-LABEL: "op_cbrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cbrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cbrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cbrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_ceil" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_ceil(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.ceil_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.ceil"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_cholesky" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> { -- // CHECK: "vhlo.cholesky_v1"(%arg0) <{ -+ // CHECK: "vhlo.cholesky_v1"(%[[ARG0]]) <{ - // CHECK-SAME: lower = #vhlo.bool_v1 - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1> - %0 = "stablehlo.cholesky"(%arg0) { -@@ -1006,22 +1078,25 @@ - } - - // CHECK-LABEL: "op_clamp" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_clamp(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.clamp_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_count_leading_zeros" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_count_leading_zeros(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.count_leading_zeros_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_collective_permute" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> { -- // CHECK: "vhlo.collective_permute_v1"(%arg0) <{ -+ // CHECK: "vhlo.collective_permute_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: source_target_pairs = #vhlo.tensor_v1 : tensor<3x2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1> -@@ -1033,8 +1108,9 @@ - } - - // CHECK-LABEL: "op_compare" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_compare(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.compare_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: compare_type = #vhlo, - // CHECK-SAME: comparison_direction = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1046,15 +1122,17 @@ - } - - // CHECK-LABEL: "op_complex" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_complex(%arg0: tensor, %arg1: tensor) -> tensor> { -- // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.complex_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor, tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_composite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_composite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.composite_v1"(%arg0) <{ -+ // CHECK: "vhlo.composite_v1"(%[[ARG0]]) <{ - // CHECK-SAME: composite_attributes = #vhlo.dict_v1<{#vhlo.string_v1<"my_int"> = #vhlo.integer_v1<1 : i64>, #vhlo.string_v1<"my_string"> = #vhlo.string_v1<"foo">}> - // CHECK-SAME: decomposition = #vhlo.string_v1<"composite_target"> - // CHECK-SAME: name = #vhlo.string_v1<"stablehlo.composite_target"> -@@ -1073,8 +1151,9 @@ - } - - // CHECK-LABEL: "op_concatenate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.concatenate_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.concatenate"(%arg0, %arg1) { -@@ -1084,6 +1163,7 @@ - } - - // CHECK-LABEL: "op_constant" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_constant(%arg0: tensor) -> tensor { - // CHECK: "vhlo.constant_v1"() <{ - // CHECK-SAME: value = #vhlo.tensor_v1 : tensor> -@@ -1095,15 +1175,17 @@ - } - - // CHECK-LABEL: "op_convert" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_convert(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.convert_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.convert"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_convolution" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> { -- // CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.convolution_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1137,8 +1219,9 @@ - } - - // CHECK-LABEL: "op_cosine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cosine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.cosine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cosine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1151,8 +1234,9 @@ - } - - // CHECK-LABEL: "op_cross_replica_sum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_cross_replica_sum(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{ -+ // CHECK: "vhlo.cross-replica-sum_v1"(%[[ARG0]]) <{ - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.cross-replica-sum"(%arg0) { -@@ -1162,8 +1246,9 @@ - } - - // CHECK-LABEL: "op_custom_call" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_custom_call(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.custom_call_v1"(%arg0) <{ -+ // CHECK: "vhlo.custom_call_v1"(%[[ARG0]]) <{ - // CHECK-SAME: api_version = #vhlo, - // CHECK-SAME: backend_config = #vhlo.string_v1<"\08\03\1A\02">, - // CHECK-SAME: call_target_name = #vhlo.string_v1<"foo">, -@@ -1194,15 +1279,17 @@ - } - - // CHECK-LABEL: "op_divide" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_divide(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.divide_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dot_general" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> { -- // CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_general_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: lhs_batching_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: lhs_contracting_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]>, -@@ -1222,8 +1309,9 @@ - } - - // CHECK-LABEL: "op_dot" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dot_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: precision_config = #vhlo.array_v1<[#vhlo, #vhlo]> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.dot"(%arg0, %arg1) { -@@ -1233,8 +1321,9 @@ - } - - // CHECK-LABEL: "op_dynamic_broadcast_in_dim" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: broadcast_dimensions = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: known_expanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: known_nonexpanding_dimensions = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1248,8 +1337,9 @@ - } - - // CHECK-LABEL: "op_dynamic_conv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { -- // CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_conv_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: batch_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: feature_group_count = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: input_batch_dimension = #vhlo.integer_v1<0 : i64>, -@@ -1283,8 +1373,9 @@ - } - - // CHECK-LABEL: "op_dynamic_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> { -- // CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.dynamic_gather_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1304,8 +1395,9 @@ - } - - // CHECK-LABEL: "op_dynamic_iota" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{ -+ // CHECK: "vhlo.dynamic_iota_v1"(%[[ARG0]]) <{ - // CHECK-SAME: iota_dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_iota"(%arg0) { -@@ -1315,22 +1407,25 @@ - } - - // CHECK-LABEL: "op_dynamic_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}, %[[ARG4:.*]]: {{.*}}) - func.func @op_dynamic_pad(%arg0: tensor, %arg1: tensor, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_pad_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor, tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<2xindex>) -> tensor { -- // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.dynamic_reshape_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<2xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor<4xf32> { -- // CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.dynamic_slice_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: slice_sizes = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) { -@@ -1340,15 +1435,17 @@ - } - - // CHECK-LABEL: "op_dynamic_update_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> -+ // CHECK: "vhlo.dynamic_update_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor) -> tensor<16xf32> - func.return %0 : tensor<16xf32> - } - - // CHECK-LABEL: "op_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> { -- // CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.einsum_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab,bc->ac"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1> - %0 = "stablehlo.einsum"(%arg0, %arg1) { -@@ -1358,22 +1455,25 @@ - } - - // CHECK-LABEL: "op_exponential_minus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential_minus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_minus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_exponential" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_exponential(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.exponential_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.exponential"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_fft" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_fft(%arg0: tensor<16xcomplex>) -> tensor<16xcomplex> { -- // CHECK: "vhlo.fft_v1"(%arg0) <{ -+ // CHECK: "vhlo.fft_v1"(%[[ARG0]]) <{ - // CHECK-SAME: fft_length = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: fft_type = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1> -@@ -1385,8 +1485,9 @@ - } - - // CHECK-LABEL: "op_floor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_floor(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.floor_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.floor"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } -@@ -1399,16 +1500,17 @@ - // CHECK-SAME: sym_name = #vhlo.string_v1<"op_func">, - // CHECK-SAME: sym_visibility = #vhlo.string_v1<"private"> - // CHECK-SAME: }> ({ -- // CHECK-NEXT: ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1): -- // CHECK-NEXT: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG0:.*]]: !vhlo.tensor_v1): -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : () -> () - - func.return %arg0 : tensor - } - - // CHECK-LABEL: "op_gather" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> { -- // CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.gather_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: collapsed_slice_dims = #vhlo.tensor_v1 : tensor<2xi64>>, - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, -@@ -1430,8 +1532,9 @@ - } - - // CHECK-LABEL: "op_get_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_dimension_size(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_dimension_size_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_dimension_size"(%arg0) { -@@ -1441,8 +1544,9 @@ - } - - // CHECK-LABEL: "op_get_tuple_element" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_get_tuple_element(%arg0: tuple, tensor>) -> tensor { -- // CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{ -+ // CHECK: "vhlo.get_tuple_element_v1"(%[[ARG0]]) <{ - // CHECK-SAME: index = #vhlo.integer_v1<0 : i32> - // CHECK-SAME: }> : (!vhlo.tuple_v1, !vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.get_tuple_element"(%arg0) { -@@ -1452,11 +1556,12 @@ - } - - // CHECK-LABEL: "op_if" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_if(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.if_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.if_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -- // CHECK-NEXT: "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1) -> () -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG2]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.if"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1467,15 +1572,17 @@ - } - - // CHECK-LABEL: "op_imag" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_imag(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.imag_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.imag"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_infeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.infeed_v1"(%arg0) <{ -+ // CHECK: "vhlo.infeed_v1"(%[[ARG0]]) <{ - // CHECK-SAME: infeed_config = #vhlo.string_v1<"foo">, - // CHECK-SAME{LITERAL}: layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]> - // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1, !vhlo.token_v1) -@@ -1498,36 +1605,41 @@ - } - - // CHECK-LABEL: "op_is_finite" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_is_finite(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.is_finite_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.is_finite"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_log_plus_one" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_log_plus_one(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.log_plus_one_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.log_plus_one"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_logistic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_logistic(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.logistic_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.logistic"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_map" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.map_v1"(%arg0) <{ -+ // CHECK: "vhlo.map_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): -@@ -1545,57 +1657,65 @@ - } - - // CHECK-LABEL: "op_maximum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_maximum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.maximum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_minimum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_minimum(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.minimum_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_multiply" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_multiply(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.multiply_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_negate" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_negate(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.negate_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.negate"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_not" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_not(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.not_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.not"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_optimization_barrier" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_optimization_barrier(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.optimization_barrier_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_or" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_or(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.or_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.or"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_outfeed" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_outfeed(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.outfeed_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: outfeed_config = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.token_v1) -> !vhlo.token_v1 - %0 = "stablehlo.outfeed"(%arg0, %arg1) { -@@ -1605,8 +1725,9 @@ - } - - // CHECK-LABEL: "op_pad" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.pad_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: edge_padding_high = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: edge_padding_low = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: interior_padding = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -1620,36 +1741,41 @@ - } - - // CHECK-LABEL: "op_popcnt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_popcnt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.popcnt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.popcnt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_power" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_power(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.power_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.power"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real_dynamic_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}, %[[ARG3:.*]]: {{.*}}) - func.func @op_real_dynamic_slice(%arg0: tensor, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor { -- // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_dynamic_slice_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_real" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_real(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.real_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.real"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_recv" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_recv(%arg0: !stablehlo.token) -> (tensor, !stablehlo.token) { -- // CHECK: "vhlo.recv_v1"(%arg0) <{ -+ // CHECK: "vhlo.recv_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<3 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -1662,8 +1788,9 @@ - } - - // CHECK-LABEL: "op_reduce" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0]], %[[ARG1]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1678,8 +1805,9 @@ - } - - // CHECK-LABEL: "op_reduce_precision" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_precision(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.reduce_precision_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_precision_v1"(%[[ARG0]]) <{ - // CHECK-SAME: exponent_bits = #vhlo.integer_v1<8 : i32> - // CHECK-SAME: mantissa_bits = #vhlo.integer_v1<10 : i32> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -@@ -1693,7 +1821,7 @@ - // CHECK_lABEL: "op_reduce_with_promotable_types" - func.func @op_reduce_with_promotable_types(%arg0: tensor<4x4xf32>, %arg1 : tensor) - -> (tensor<4xf64>) { -- // CHECK: "vhlo.reduce_v1"(%arg0, %arg1) -+ // CHECK: "vhlo.reduce_v1"(%[[ARG0:.*]], %[[ARG1:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x4x!vhlo.f32_v1>, !vhlo.tensor_v1) -> !vhlo.tensor_v1<4x!vhlo.f64_v1> -@@ -1708,8 +1836,9 @@ - } - - // CHECK-LABEL: "op_reduce_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{ -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME{LITERAL}: replica_groups = #vhlo.tensor_v1 : tensor<2x1xi64>>, - // CHECK-SAME: scatter_dimension = #vhlo.integer_v1<0 : i64> -@@ -1734,7 +1863,7 @@ - - // CHECK_lABEL: "op_reduce_scatter_with_promotable_types" - func.func @op_reduce_scatter_with_promotable_types(%data: tensor<4x16xf32>) -> tensor<4x4xf64> { -- // CHECK: "vhlo.reduce_scatter_v1"(%arg0) -+ // CHECK: "vhlo.reduce_scatter_v1"(%[[ARG0:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f64_v1> -@@ -1751,8 +1880,9 @@ - - - // CHECK-LABEL: "op_reduce_window" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor) -> tensor<2x9x16x7xf32> { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: base_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME{LITERAL}: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dilations = #vhlo.tensor_v1 : tensor<4xi64>>, -@@ -1777,11 +1907,11 @@ - func.return %0 : tensor<2x9x16x7xf32> - } - --// CHECK_lABEL: "op_reduce_window_with_promotable_types" -+// CHECK-LABEL: "op_reduce_window_with_promotable_types" - func.func @op_reduce_window_with_promotable_types(%arg0: tensor<4x2xf32>, - %arg1: tensor<4x2xf32>, %init0: tensor, %init1: tensor) -> - (tensor<2x2xf64>, tensor<2x2xf32>) { -- // CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1, %arg2, %arg3) -+ // CHECK: "vhlo.reduce_window_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1, %[[ARG3:arg.*]]: !vhlo.tensor_v1, %[[ARG4:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]], %[[VAL2:.*]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1, !vhlo.tensor_v1) -> (!vhlo.tensor_v1<2x2x!vhlo.f64_v1>, !vhlo.tensor_v1<2x2x!vhlo.f32_v1>) -@@ -1801,8 +1931,9 @@ - } - - // CHECK-LABEL: "op_remainder" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_remainder(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.remainder_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -1822,16 +1953,18 @@ - } - - // CHECK-LABEL: "op_reshape" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> { -- // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> -+ // CHECK: "vhlo.reshape_v1"(%[[ARG0]]) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1> - %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32> - func.return %0 : tensor<4x4xf32> - } - - // CHECK-LABEL: "op_return" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_return(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.case_v1"(%arg0) ({ -- // CHECK-NEXT: "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1) -> () -+ // CHECK: "vhlo.case_v1"(%[[ARG0]]) ({ -+ // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.case"(%arg0) ({ - "stablehlo.return"(%arg1) : (tensor) -> () -@@ -1840,8 +1973,9 @@ - } - - // CHECK-LABEL: "op_reverse" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.reverse_v1"(%arg0) <{ -+ // CHECK: "vhlo.reverse_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimensions = #vhlo.tensor_v1 : tensor<1xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.reverse"(%arg0) { -@@ -1851,8 +1985,9 @@ - } - - // CHECK-LABEL: "op_rng_bit_generator" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rng_bit_generator(%arg0: tensor) -> (tensor, tensor) { -- // CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{ -+ // CHECK: "vhlo.rng_bit_generator_v1"(%[[ARG0]]) <{ - // CHECK-SAME: rng_algorithm = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> (!vhlo.tensor_v1, !vhlo.tensor_v1) - %0:2 = "stablehlo.rng_bit_generator"(%arg0) { -@@ -1862,8 +1997,9 @@ - } - - // CHECK-LABEL: "op_rng" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_rng(%arg0: tensor, %arg1: tensor, %arg2: tensor<0xindex>) -> tensor { -- // CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.rng_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: rng_distribution = #vhlo - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1<0x!vhlo.index_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) { -@@ -1873,29 +2009,33 @@ - } - - // CHECK-LABEL: "op_round_nearest_afz" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_afz(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_afz_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_round_nearest_even" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_round_nearest_even(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.round_nearest_even_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_rsqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_rsqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.rsqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.rsqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: index_vector_dim = #vhlo.integer_v1<1 : i64>, - // CHECK-SAME: indices_are_sorted = #vhlo.bool_v1, - // CHECK-SAME: inserted_window_dims = #vhlo.tensor_v1 : tensor<2xi64>>, -@@ -1928,7 +2068,7 @@ - func.func @op_scatter_with_promotable_types(%input_tensor: tensor<200x100x300xf32>, - %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) -> - tensor<200x100x300xf64> { -- // CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.scatter_v1"(%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1) -> () - // CHECK: }) : (!vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>, !vhlo.tensor_v1<10x2x!vhlo.i32_v1>, !vhlo.tensor_v1<10x300x!vhlo.f32_v1>) -> !vhlo.tensor_v1<200x100x300x!vhlo.f64_v1> -@@ -1951,8 +2091,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf32> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{ -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) <{ - // CHECK-SAME: padding = #vhlo.tensor_v1 : tensor<4x2xi64>>, - // CHECK-SAME: window_dimensions = #vhlo.tensor_v1 : tensor<4xi64>>, - // CHECK-SAME: window_strides = #vhlo.tensor_v1 : tensor<4xi64>> -@@ -1982,8 +2123,9 @@ - } - - // CHECK-LABEL: "op_select_and_scatter_with_promotable_types" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select_and_scatter_with_promotable_types(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor) -> tensor<10x24x24x64xf64> { -- // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) -+ // CHECK: "vhlo.select_and_scatter_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) - // CHECK: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1, %[[ARG2:arg.*]]: !vhlo.tensor_v1): - // CHECK: %[[VAL:.*]] = "vhlo.add_v1"(%[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - // CHECK: "vhlo.return_v1"(%[[VAL]]) : (!vhlo.tensor_v1) -> () -@@ -2005,15 +2147,17 @@ - } - - // CHECK-LABEL: "op_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}) - func.func @op_select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -- // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.select_v1"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor, tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_send" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_send(%arg0: tensor, %arg1: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.send_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.send_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: channel_id = #vhlo.integer_v1<0 : i64>, - // CHECK-SAME: channel_type = #vhlo.integer_v1<2 : i64>, - // CHECK-SAME: is_host_transfer = #vhlo.bool_v1 -@@ -2026,8 +2170,9 @@ - } - - // CHECK-LABEL: "op_set_dimension_size" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_set_dimension_size(%arg0: tensor, %arg1: tensor) -> tensor<16xf32> { -- // CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.set_dimension_size_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1<16x!vhlo.f32_v1> - %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) { -@@ -2037,43 +2182,49 @@ - } - - // CHECK-LABEL: "op_shift_left" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_left(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_left_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_arithmetic" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_arithmetic(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_arithmetic_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_shift_right_logical" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_shift_right_logical(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.shift_right_logical_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sign" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sign(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sign_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sign"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_sine" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sine(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sine_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sine"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_slice" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> { -- // CHECK: "vhlo.slice_v1"(%arg0) <{ -+ // CHECK: "vhlo.slice_v1"(%[[ARG0]]) <{ - // CHECK-SAME: limit_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: start_indices = #vhlo.tensor_v1 : tensor<1xi64>>, - // CHECK-SAME: strides = #vhlo.tensor_v1 : tensor<1xi64>> -@@ -2087,8 +2238,9 @@ - } - - // CHECK-LABEL: "op_sort" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> { -- // CHECK: "vhlo.sort_v1"(%arg0) <{ -+ // CHECK: "vhlo.sort_v1"(%[[ARG0]]) <{ - // CHECK-SAME: dimension = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: is_stable = #vhlo.bool_v1 - // CHECK-SAME: }> ({ -@@ -2108,29 +2260,33 @@ - } - - // CHECK-LABEL: "op_sqrt" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_sqrt(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.sqrt_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.sqrt"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_subtract" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_subtract(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.subtract_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_tanh" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tanh(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.tanh_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.tanh"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_torch_index_select" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) -> tensor<2x1x5xf32> { -- // CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.torch_index_select_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: batch_dims = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: dim = #vhlo.integer_v1<0 : i64> - // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1> -@@ -2142,8 +2298,9 @@ - } - - // CHECK-LABEL: "op_trace" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_trace(%arg0: tensor) { -- // CHECK: "vhlo.trace_v1"(%arg0) <{ -+ // CHECK: "vhlo.trace_v1"(%[[ARG0]]) <{ - // CHECK-SAME: tag = #vhlo.string_v1<"foo"> - // CHECK-SAME: }> : (!vhlo.tensor_v1) -> () - "stablehlo.trace"(%arg0) { -@@ -2153,8 +2310,9 @@ - } - - // CHECK-LABEL: "op_transpose" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { -- // CHECK: "vhlo.transpose_v1"(%arg0) <{ -+ // CHECK: "vhlo.transpose_v1"(%[[ARG0]]) <{ - // CHECK-SAME: permutation = #vhlo.tensor_v1 : tensor<2xi64>> - // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1> - %0 = "stablehlo.transpose"(%arg0) { -@@ -2164,8 +2322,9 @@ - } - - // CHECK-LABEL: "op_triangular_solve" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) -> tensor<16x16xf32> { -- // CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{ -+ // CHECK: "vhlo.triangular_solve_v1"(%[[ARG0]], %[[ARG1]]) <{ - // CHECK-SAME: left_side = #vhlo.bool_v1, - // CHECK-SAME: lower = #vhlo.bool_v1, - // CHECK-SAME: transpose_a = #vhlo, -@@ -2181,15 +2340,17 @@ - } - - // CHECK-LABEL: "op_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_tuple(%arg0: tensor) -> tuple> { -- // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> -+ // CHECK: "vhlo.tuple_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tuple_v1> - %0 = "stablehlo.tuple"(%arg0) : (tensor) -> tuple> - func.return %0 : tuple> - } - - // CHECK-LABEL: "op_unary_einsum" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> { -- // CHECK: "vhlo.unary_einsum_v1"(%arg0) <{ -+ // CHECK: "vhlo.unary_einsum_v1"(%[[ARG0]]) <{ - // CHECK-SAME: einsum_config = #vhlo.string_v1<"ab->a"> - // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1> - %0 = "stablehlo.unary_einsum"(%arg0) { -@@ -2199,22 +2360,25 @@ - } - - // CHECK-LABEL: "op_uniform_dequantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_dequantize(%arg0: tensor>) -> tensor { -- // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.uniform_dequantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1>) -> !vhlo.tensor_v1 - %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor>) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "op_uniform_quantize" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_uniform_quantize(%arg0: tensor) -> tensor> { -- // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.uniform_quantize_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1> - %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "op_while" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @op_while(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.while_v1"(%arg0) ({ -+ // CHECK: "vhlo.while_v1"(%[[ARG0]]) ({ - // CHECK-NEXT: ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1): - // CHECK-NEXT: "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1) -> () - // CHECK-NEXT: }, { -@@ -2232,8 +2396,9 @@ - } - - // CHECK-LABEL: "op_xor" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @op_xor(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.xor_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } -@@ -2241,197 +2406,225 @@ - // ============ TYPES ============ - - // CHECK-LABEL: "type_i1" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i1(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.and_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.and"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_i64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_i64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui4" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui4(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui8" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui8(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_ui64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_ui64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FN" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FN(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E4M3B11FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E4M3B11FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f8E5M2FNUZ" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f8E5M2FNUZ(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_bf16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_bf16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f16" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f16(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f32(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_f64(%arg0: tensor, %arg1: tensor) -> tensor { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1, !vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_complex_f32" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f32(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_complex_f64" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_complex_f64(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_dynamism_ranked" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_dynamism_ranked(%arg0: tensor) -> tensor { -- // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 -+ // CHECK: "vhlo.abs_v1"(%[[ARG0]]) : (!vhlo.tensor_v1) -> !vhlo.tensor_v1 - %0 = "stablehlo.abs"(%arg0) : (tensor) -> tensor - func.return %0 : tensor - } - - // CHECK-LABEL: "type_per_tensor_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}) - func.func @type_per_tensor_quantization(%arg0: tensor>, %arg1: tensor>) -> tensor> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG1]]) : (!vhlo.tensor_v1>, !vhlo.tensor_v1>) -> !vhlo.tensor_v1> - %0 = "stablehlo.add"(%arg0, %arg1) : (tensor>, tensor>) -> tensor> - func.return %0 : tensor> - } - - // CHECK-LABEL: "type_per_axis_quantization" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_per_axis_quantization(%arg0: tensor<2x!quant.uniform>) -> tensor<2x!quant.uniform> { -- // CHECK: "vhlo.add_v1"(%arg0, %arg0) : (!vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>, !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>) -> !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1> -+ // CHECK: "vhlo.add_v1"(%[[ARG0]], %[[ARG0]]) : (!vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>, !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1>) -> !vhlo.tensor_v1<2x!vhlo.quant_per_axis_v1> - %0 = stablehlo.add %arg0, %arg0 : tensor<2x!quant.uniform> - func.return %0 : tensor<2x!quant.uniform> - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_callee" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> () -+ // CHECK: "vhlo.return_v1"(%[[ARG0]]) : (!vhlo.token_v1) -> () - return %arg0 : !stablehlo.token - } - - // CHECK: function_type = #vhlo.type_v1 !vhlo.token_v1>> - // CHECK-LABEL: "type_token_caller" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token { -- // CHECK: "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">} -+ // CHECK: "vhlo.call_v1"(%[[ARG0]]) <{callee = #vhlo.string_v1<"type_token_callee">} - // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1 - %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token - return %0 : !stablehlo.token - } - - // CHECK-LABEL: "type_tuple" -+// CHECK-NEXT: (%[[ARG0:.*]]: {{.*}}) - func.func @type_tuple(%arg0: tuple>) -> tuple { - %0 = "stablehlo.custom_call"(%arg0) { - call_target_name = "foo" diff --git a/third_party/xla/third_party/stablehlo/workspace.bzl b/third_party/xla/third_party/stablehlo/workspace.bzl index 6a72c8fa16885c..aaef166d96583c 100644 --- a/third_party/xla/third_party/stablehlo/workspace.bzl +++ b/third_party/xla/third_party/stablehlo/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") def repo(): # LINT.IfChange - STABLEHLO_COMMIT = "8ba7728d3fdc3ea882e893ee7e53255c95ee0e5a" - STABLEHLO_SHA256 = "1dfc7179dc9200c3ab4ea85edbac4a35393866d8cd8694fcaac00c1d27036408" + STABLEHLO_COMMIT = "797bee217e1a041e9aac22cad4db207274596d94" + STABLEHLO_SHA256 = "e5619033e131ea2eeb9eab8c8e362f3ba12e111c6b4a15dac789ca216ff22c58" # LINT.ThenChange(Google-internal path) tf_http_archive( diff --git a/third_party/xla/xla/mlir_hlo/mhlo/IR/hlo_base.td b/third_party/xla/xla/mlir_hlo/mhlo/IR/hlo_base.td index 15d8dcc8cf5f5f..992aca563edf97 100644 --- a/third_party/xla/xla/mlir_hlo/mhlo/IR/hlo_base.td +++ b/third_party/xla/xla/mlir_hlo/mhlo/IR/hlo_base.td @@ -131,4 +131,6 @@ defvar MHLO_StaticShapeIntOrFpTensor = HLO_StaticShapeIntOrFpTensor; defvar MHLO_StaticShapeIntFpOrComplexTensor = HLO_StaticShapeIntFpOrComplexTensor; +defvar MHLO_Static1DIntTensor = HLO_Static1DIntTensor; + #endif // MLIR_HLO_DIALECT_MHLO_IR_HLO_BASE diff --git a/third_party/xla/xla/mlir_hlo/mhlo/IR/hlo_ops.td b/third_party/xla/xla/mlir_hlo/mhlo/IR/hlo_ops.td index 75db5312d336c1..cfba735634a366 100644 --- a/third_party/xla/xla/mlir_hlo/mhlo/IR/hlo_ops.td +++ b/third_party/xla/xla/mlir_hlo/mhlo/IR/hlo_ops.td @@ -120,12 +120,12 @@ def MHLO_IotaOp : MHLO_Op<"iota", [Pure]> { def MHLO_DynamicIotaOp: MHLO_ShapedInterfaceOp<"dynamic_iota", [Pure]> { let summary = "DynamicIota operation"; let description = [{ - This operation is a work in progress, so it is not yet included in - the specification: https://github.com/openxla/stablehlo/issues/8. + This operation is functionally identical to + [iota](https://github.com/openxla/stablehlo/blob/main/docs/spec.md#iota) + op, but the result shape is specified dynamically via `output_shape`. - Informally, this operation does the same thing as IotaOp except that the - result shape is specified dynamically via `output_shape`: - https://github.com/openxla/stablehlo/blob/main/docs/spec.md#iota + See: + https://github.com/openxla/stablehlo/blob/main/docs/spec.md#dynamic_iota Example: ```mlir @@ -2126,19 +2126,30 @@ def MHLO_DynamicBroadcastInDimOp : MHLO_ShapedInterfaceOp< "dynamic_broadcast_in_dim", [Pure]> { let summary = "DynamicBroadcastInDim operation"; let description = [{ - This operation is a work in progress, so it is not yet included in - the specification: https://github.com/openxla/stablehlo/issues/8. - - Informally, this operation does the same thing as BroadcastInDimOp except - that the result shape is specified dynamically via `output_dimensions`: - https://github.com/openxla/stablehlo/blob/main/docs/spec.md#broadcast_in_dim + This operation is functionally identical to + [broadcast_in_dim](https://github.com/openxla/stablehlo/blob/main/docs/spec.md#broadcast_in_dim) + op, but the result shape is specified dynamically via `output_dimensions`. It also accepts optional attributes to express static knowledge about the expanding behavior of dimensions. If not specified, all dimensions are assumed to be possibly expanding. The sets of dimensions that are known to be expanding and the set of dimensions that are known to be non-expanding must be disjoint and they must be a subset of the operand's dimensions. + + See: https://github.com/openxla/stablehlo/blob/main/docs/spec.md#dynamic_broadcast_in_dim + + Example: + ```mlir + %operand = mhlo.constant dense<[[1, 2, 3]]> : tensor<1x3xi64> + %output_dimensions = mhlo.constant dense<[2, 3, 2]> : tensor<3xi64> + %result = "mhlo.dynamic_broadcast_in_dim"(%operand, %output_dimensions) { + broadcast_dimensions = array, + known_expanding_dimensions = array, + known_non_expanding_dimensions = array + } : (tensor<1x3xi64>, tensor<3xi64>) -> tensor<2x3x2xi64> + ``` }]; + let arguments = (ins MHLO_Tensor:$operand, MHLO_DimensionTensor:$output_dimensions, @@ -2854,16 +2865,17 @@ def MHLO_ReshapeOp: MHLO_Op<"reshape", def MHLO_DynamicReshapeOp: MHLO_ShapedInterfaceOp<"dynamic_reshape", [Pure]> { let summary = "DynamicReshape operation"; let description = [{ - This operation is a work in progress, so it is not yet included in - the specification: https://github.com/openxla/stablehlo/issues/8. + This operation is functionally identical to + [reshape](https://github.com/openxla/stablehlo/blob/main/docs/spec.md#reshape) + op, but the result shape is specified dynamically via `output_shape`. - Informally, this operation does the same thing as ReshapeOp except that the - result shape is specified dynamically via `output_shape`: - https://github.com/openxla/stablehlo/blob/main/docs/spec.md#reshape + See: + https://github.com/openxla/stablehlo/blob/main/docs/spec.md#dynamic_reshape Example: ```mlir - %0 = mhlo.dynamic_reshape %arg0, %shape : (tensor, tensor<2xindex>) -> tensor + %output_shape = mhlo.constant dense<[3, 2]> : tensor<2xi64> + %result = mhlo.dynamic_reshape %operand, %output_shape : (tensor<2x3xi64>, tensor<2xi64>) -> tensor<3x2xi64> ``` }]; @@ -3795,7 +3807,7 @@ def MHLO_DynamicGatherOp: MHLO_Op<"dynamic_gather", let arguments = (ins MHLO_Tensor:$operand, MHLO_IntTensor:$start_indices, - MHLO_IntTensor:$slice_sizes, + MHLO_Static1DIntTensor:$slice_sizes, MHLO_GatherDimensionNumbers:$dimension_numbers, DefaultValuedOptionalAttr:$indices_are_sorted ); diff --git a/third_party/xla/xla/mlir_hlo/mhlo/transforms/map_stablehlo_to_hlo_op.h b/third_party/xla/xla/mlir_hlo/mhlo/transforms/map_stablehlo_to_hlo_op.h index 031ce586e80a5c..80c6c0b0bd1ea7 100644 --- a/third_party/xla/xla/mlir_hlo/mhlo/transforms/map_stablehlo_to_hlo_op.h +++ b/third_party/xla/xla/mlir_hlo/mhlo/transforms/map_stablehlo_to_hlo_op.h @@ -150,7 +150,6 @@ MAP_STABLEHLO_TO_HLO(SqrtOp) MAP_STABLEHLO_TO_HLO(SubtractOp) MAP_STABLEHLO_TO_HLO(TanhOp) MAP_STABLEHLO_TO_HLO(TorchIndexSelectOp) -MAP_STABLEHLO_TO_HLO(TraceOp) MAP_STABLEHLO_TO_HLO(TransposeOp) MAP_STABLEHLO_TO_HLO(TriangularSolveOp) MAP_STABLEHLO_TO_HLO(TupleOp) diff --git a/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/hlo-legalize-to-stablehlo.mlir b/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/hlo-legalize-to-stablehlo.mlir index b624a4341dba83..32f555f88cc1cc 100644 --- a/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/hlo-legalize-to-stablehlo.mlir +++ b/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/hlo-legalize-to-stablehlo.mlir @@ -821,21 +821,19 @@ func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xind } // CHECK-LABEL: "op_dynamic_conv" -func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { +func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<2x2xi32>) -> tensor<1x?x?x16xf32> { // CHECK: "stablehlo.dynamic_conv"([[ARG0:%arg[0-9]+]], [[ARG1:%arg[0-9]+]], [[ARG2:%arg[0-9]+]]) <{ // CHECK-SAME: batch_group_count = 1 : i64, // CHECK-SAME: dimension_numbers = #stablehlo.conv<[b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f]>, // CHECK-SAME: feature_group_count = 1 : i64, // CHECK-SAME: lhs_dilation = array, - // CHECK-SAME: padding = dense<1> : tensor<2x2xi64>, // CHECK-SAME: precision_config = [#stablehlo, #stablehlo], // CHECK-SAME: rhs_dilation = array, // CHECK-SAME: window_reversal = array, // CHECK-SAME: window_strides = array - // CHECK-SAME: }> : (tensor<1x8x8x207xf32>, tensor<3x3x207x16xf32>, tensor<4xi32>) -> tensor<1x?x?x16xf32> + // CHECK-SAME: }> : (tensor<1x8x8x207xf32>, tensor<3x3x207x16xf32>, tensor<2x2xi32>) -> tensor<1x?x?x16xf32> %0 = "mhlo.dynamic_conv"(%arg0, %arg1, %arg2) { window_strides = dense<1> : tensor<2xi64>, - padding = dense<1> : tensor<2x2xi64>, lhs_dilation = dense<1> : tensor<2xi64>, rhs_dilation = dense<1> : tensor<2xi64>, window_reversal = dense : tensor<2xi1>, @@ -843,7 +841,7 @@ func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x1 feature_group_count = 1 : i64, batch_group_count = 1 : i64, precision_config = [#mhlo, #mhlo] - } : (tensor<1x8x8x207xf32>, tensor<3x3x207x16xf32>, tensor<4xi32>) -> tensor<1x?x?x16xf32> + } : (tensor<1x8x8x207xf32>, tensor<3x3x207x16xf32>, tensor<2x2xi32>) -> tensor<1x?x?x16xf32> func.return %0 : tensor<1x?x?x16xf32> } @@ -1604,17 +1602,6 @@ func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) func.return %0 : tensor<2x1x5xf32> } -// CHECK-LABEL: "op_trace" -func.func @op_trace(%arg0: tensor) { - // CHECK: "stablehlo.trace"([[ARG0:%arg[0-9]+]]) <{ - // CHECK-SAME: tag = "foo" - // CHECK-SAME: }> : (tensor) -> () - "mhlo.trace"(%arg0) { - tag = "foo" - } : (tensor) -> () - func.return -} - // CHECK-LABEL: "op_transpose" func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { // CHECK: "stablehlo.transpose"([[ARG0:%arg[0-9]+]]) <{ diff --git a/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/mhlo-quant-legalize-to-int.mlir b/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/mhlo-quant-legalize-to-int.mlir index 7d9455a5535ac9..6b0404881d9e2b 100644 --- a/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/mhlo-quant-legalize-to-int.mlir +++ b/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/mhlo-quant-legalize-to-int.mlir @@ -697,7 +697,7 @@ func.func @dot_dynamic_batch_dim( // CHECK-LABEL: func @dot_general func.func @dot_general( %arg0: tensor<2x5x6x!quant.uniform>, - %arg1: tensor<6x8x2x!quant.uniform> + %arg1: tensor<6x8x2x!quant.uniform> ) -> tensor<2x5x8x!quant.uniform> { // CHECK: %[[DOT_RES:.*]] = "mhlo.dot_general" // CHECK-SAME: lhs_batching_dimensions = [0] @@ -705,22 +705,7 @@ func.func @dot_general( // CHECK-SAME: lhs_contracting_dimensions = [2] // CHECK-SAME: rhs_contracting_dimensions = [0] - // Zero point offset contribution from LHS tensor * RHS ZP. - - // CHECK: %[[LHS_I32:.*]] = mhlo.convert %[[LHS:.*]] : (tensor<2x5x6xi8>) - // CHECK-SAME: -> tensor<2x5x6xi32> - // CHECK: %[[LHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor - // CHECK: %[[LHS_REDUCE:.*]] = mhlo.reduce(%[[LHS_I32]] init: %[[LHS_REDUCE_INIT]]) - // CHECK-SAME: applies mhlo.add across dimensions = [2] - // CHECK-SAME: (tensor<2x5x6xi32>, tensor) - // CHECK-SAME: -> tensor<2x5xi32> - // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<5> : tensor - // CHECK: %[[LHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply - // CHECK-SAME: %[[LHS_REDUCE]], %[[RHS_ZP]] : - // CHECK-SAME: (tensor<2x5xi32>, tensor) -> tensor<2x5xi32> - // CHECK: %[[LHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[LHS_ZP_CONTRIB]]) - // CHECK-SAME: broadcast_dimensions = dense<[0, 1]> - // CHECK-SAME: (tensor<2x5xi32>) -> tensor<2x5x8xi32> + // Zero point offset contribution from LHS tensor * RHS ZP is 0 and skipped. // Zero point offset contribution from RHS tensor * LHS ZP. @@ -738,13 +723,8 @@ func.func @dot_general( // CHECK: %[[RHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[RHS_ZP_CONTRIB]]) // CHECK-SAME: broadcast_dimensions = dense<[2, 0]> // CHECK-SAME: (tensor<8x2xi32>) -> tensor<2x5x8xi32> - // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.add %[[LHS_ZP_BCAST]], %[[RHS_ZP_BCAST]] - - // Zero point offset contribution from LHS ZP * RHS ZP. - // CHECK: %[[ZPS:.*]] = mhlo.constant dense<90> : tensor - // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_subtract %[[ZP_TOTAL_1]], %[[ZPS]] - // CHECK-SAME: (tensor<2x5x8xi32>, tensor) -> tensor<2x5x8xi32> + // Zero point offset contribution from LHS ZP * RHS ZP is 0 and skipped. // Combine dot result with zero point offset and output final result. @@ -756,17 +736,17 @@ func.func @dot_general( // CHECK: %[[RES_INT:.*]] = mhlo.convert %[[RES_FP_1]] // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> - // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] + // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.convert %[[RHS_ZP_BCAST]] // CHECK-SAME: (tensor<2x5x8xi32>) -> tensor<2x5x8xf32> - // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_multiply - // CHECK-SAME: %[[ZP_TOTAL_3:.*]], %[[COMBINED_SCALE]] - // CHECK: %[[ZP_TOTAL_5:.*]] = mhlo.convert %[[ZP_TOTAL_4]] + // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[ZP_TOTAL_1:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] // CHECK-SAME: (tensor<2x5x8xf32>) -> tensor<2x5x8xi32> // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor - // CHECK: %[[ZP_TOTAL_6:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_5]] + // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_3]] // CHECK-SAME: (tensor, tensor<2x5x8xi32>) -> tensor<2x5x8xi32> - // CHECK: chlo.broadcast_add %[[RES_INT]], %[[ZP_TOTAL_6]] + // CHECK: chlo.broadcast_add %[[RES_INT]], %[[ZP_TOTAL_4]] %0 = "mhlo.dot_general" (%arg0, %arg1) { dot_dimension_numbers = #mhlo.dot< @@ -776,7 +756,7 @@ func.func @dot_general( rhs_contracting_dimensions = [0] >} : ( tensor<2x5x6x!quant.uniform>, - tensor<6x8x2x!quant.uniform> + tensor<6x8x2x!quant.uniform> ) -> tensor<2x5x8x!quant.uniform> return %0 : tensor<2x5x8x!quant.uniform> } @@ -786,7 +766,7 @@ func.func @dot_general( // CHECK-LABEL: func @dot_general_combined_scale_1 func.func @dot_general_combined_scale_1( %arg0: tensor<2x5x6x!quant.uniform>, - %arg1: tensor<6x8x2x!quant.uniform> + %arg1: tensor<6x8x2x!quant.uniform> ) -> tensor<2x5x8x!quant.uniform> { // CHECK: %[[DOT_RES:.*]] = "mhlo.dot_general" // CHECK-SAME: lhs_batching_dimensions = [0] @@ -794,22 +774,7 @@ func.func @dot_general_combined_scale_1( // CHECK-SAME: lhs_contracting_dimensions = [2] // CHECK-SAME: rhs_contracting_dimensions = [0] - // Zero point offset contribution from LHS tensor * RHS ZP. - - // CHECK: %[[LHS_I32:.*]] = mhlo.convert %[[LHS:.*]] : (tensor<2x5x6xi8>) - // CHECK-SAME: -> tensor<2x5x6xi32> - // CHECK: %[[LHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor - // CHECK: %[[LHS_REDUCE:.*]] = mhlo.reduce(%[[LHS_I32]] init: %[[LHS_REDUCE_INIT]]) - // CHECK-SAME: applies mhlo.add across dimensions = [2] - // CHECK-SAME: (tensor<2x5x6xi32>, tensor) - // CHECK-SAME: -> tensor<2x5xi32> - // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<5> : tensor - // CHECK: %[[LHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply - // CHECK-SAME: %[[LHS_REDUCE]], %[[RHS_ZP]] : - // CHECK-SAME: (tensor<2x5xi32>, tensor) -> tensor<2x5xi32> - // CHECK: %[[LHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[LHS_ZP_CONTRIB]]) - // CHECK-SAME: broadcast_dimensions = dense<[0, 1]> - // CHECK-SAME: (tensor<2x5xi32>) -> tensor<2x5x8xi32> + // Zero point offset contribution from LHS tensor * RHS ZP is 0 and skipped. // Zero point offset contribution from RHS tensor * LHS ZP. @@ -827,21 +792,11 @@ func.func @dot_general_combined_scale_1( // CHECK: %[[RHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[RHS_ZP_CONTRIB]]) // CHECK-SAME: broadcast_dimensions = dense<[2, 0]> // CHECK-SAME: (tensor<8x2xi32>) -> tensor<2x5x8xi32> - // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.add %[[LHS_ZP_BCAST]], %[[RHS_ZP_BCAST]] - - // Zero point offset contribution from LHS ZP * RHS ZP. - - // CHECK: %[[ZPS:.*]] = mhlo.constant dense<90> : tensor - // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_subtract %[[ZP_TOTAL_1]], %[[ZPS]] - // CHECK-SAME: (tensor<2x5x8xi32>, tensor) -> tensor<2x5x8xi32> - - // Combine dot result with zero point offset and output final result. - // Do not multiply by combined scale since it is 1.0 and thus no-op. // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor - // CHECK: %[[ZP_TOTAL_3:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_2]] + // CHECK: %[[ZP_TOTAL_1:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[RHS_ZP_BCAST]] // CHECK-SAME: (tensor, tensor<2x5x8xi32>) -> tensor<2x5x8xi32> - // CHECK: chlo.broadcast_add %[[DOT_RES]], %[[ZP_TOTAL_3]] + // CHECK: chlo.broadcast_add %[[DOT_RES]], %[[ZP_TOTAL_1]] %0 = "mhlo.dot_general" (%arg0, %arg1) { dot_dimension_numbers = #mhlo.dot< @@ -851,7 +806,7 @@ func.func @dot_general_combined_scale_1( rhs_contracting_dimensions = [0] >} : ( tensor<2x5x6x!quant.uniform>, - tensor<6x8x2x!quant.uniform> + tensor<6x8x2x!quant.uniform> ) -> tensor<2x5x8x!quant.uniform> return %0 : tensor<2x5x8x!quant.uniform> } @@ -861,7 +816,7 @@ func.func @dot_general_combined_scale_1( // CHECK-LABEL: func @dot_general_multiple_batching_dims func.func @dot_general_multiple_batching_dims( %arg0: tensor<2x5x3x7x6x!quant.uniform>, - %arg1: tensor<6x2x7x8x3x!quant.uniform> + %arg1: tensor<6x2x7x8x3x!quant.uniform> ) -> tensor<2x3x5x8x!quant.uniform> { // CHECK: %[[DOT_RES:.*]] = "mhlo.dot_general" // CHECK-SAME: lhs_batching_dimensions = [0, 2] @@ -869,22 +824,6 @@ func.func @dot_general_multiple_batching_dims( // CHECK-SAME: lhs_contracting_dimensions = [4, 3] // CHECK-SAME: rhs_contracting_dimensions = [0, 2]>} - // Zero point offset contribution from LHS tensor * RHS ZP. - - // CHECK: %[[LHS_I32:.*]] = mhlo.convert %[[LHS:.*]] : (tensor<2x5x3x7x6xi8>) - // CHECK-SAME: -> tensor<2x5x3x7x6xi32> - // CHECK: %[[LHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor - // CHECK: %[[LHS_REDUCE:.*]] = mhlo.reduce(%[[LHS_I32]] init: %[[LHS_REDUCE_INIT]]) - // CHECK-SAME: applies mhlo.add across dimensions = [4, 3] - // CHECK-SAME: (tensor<2x5x3x7x6xi32>, tensor) - // CHECK-SAME: -> tensor<2x5x3xi32> - // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<5> : tensor - // CHECK: %[[LHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply - // CHECK-SAME: %[[LHS_REDUCE]], %[[RHS_ZP]] : - // CHECK-SAME: (tensor<2x5x3xi32>, tensor) -> tensor<2x5x3xi32> - // CHECK: %[[LHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[LHS_ZP_CONTRIB]]) - // CHECK-SAME: broadcast_dimensions = dense<[0, 2, 1]> - // CHECK-SAME: (tensor<2x5x3xi32>) -> tensor<2x3x5x8xi32> // Zero point offset contribution from RHS tensor * LHS ZP. @@ -902,13 +841,7 @@ func.func @dot_general_multiple_batching_dims( // CHECK: %[[RHS_ZP_BCAST:.*]] = "mhlo.broadcast_in_dim"(%[[RHS_ZP_CONTRIB]]) // CHECK-SAME: broadcast_dimensions = dense<[0, 3, 1]> // CHECK-SAME: (tensor<2x8x3xi32>) -> tensor<2x3x5x8xi32> - // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.add %[[LHS_ZP_BCAST]], %[[RHS_ZP_BCAST]] - // Zero point offset contribution from LHS ZP * RHS ZP. - - // CHECK: %[[ZPS:.*]] = mhlo.constant dense<630> : tensor - // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_subtract %[[ZP_TOTAL_1]], %[[ZPS]] - // CHECK-SAME: (tensor<2x3x5x8xi32>, tensor) -> tensor<2x3x5x8xi32> // Combine dot result with zero point offset and output final result. @@ -920,17 +853,17 @@ func.func @dot_general_multiple_batching_dims( // CHECK: %[[RES_INT:.*]] = mhlo.convert %[[RES_FP_1]] // CHECK-SAME: (tensor<2x3x5x8xf32>) -> tensor<2x3x5x8xi32> - // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] + // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.convert %[[RHS_ZP_BCAST]] // CHECK-SAME: (tensor<2x3x5x8xi32>) -> tensor<2x3x5x8xf32> - // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_multiply - // CHECK-SAME: %[[ZP_TOTAL_3:.*]], %[[COMBINED_SCALE]] - // CHECK: %[[ZP_TOTAL_5:.*]] = mhlo.convert %[[ZP_TOTAL_4]] + // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[ZP_TOTAL_1:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] // CHECK-SAME: (tensor<2x3x5x8xf32>) -> tensor<2x3x5x8xi32> // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor - // CHECK: %[[ZP_TOTAL_6:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_5]] + // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_3]] // CHECK-SAME: (tensor, tensor<2x3x5x8xi32>) -> tensor<2x3x5x8xi32> - // CHECK: chlo.broadcast_add %[[RES_INT]], %[[ZP_TOTAL_6]] + // CHECK: chlo.broadcast_add %[[RES_INT]], %[[ZP_TOTAL_4]] %0 = "mhlo.dot_general" (%arg0, %arg1) { dot_dimension_numbers = #mhlo.dot< @@ -940,7 +873,7 @@ func.func @dot_general_multiple_batching_dims( rhs_contracting_dimensions = [0, 2] >} : ( tensor<2x5x3x7x6x!quant.uniform>, - tensor<6x2x7x8x3x!quant.uniform> + tensor<6x2x7x8x3x!quant.uniform> ) -> tensor<2x3x5x8x!quant.uniform> return %0 : tensor<2x3x5x8x!quant.uniform> } @@ -1058,7 +991,7 @@ func.func @dot_general_zero_zp( // CHECK-LABEL: func @dot_general_multiple_dynamic_dims func.func @dot_general_multiple_dynamic_dims( %arg0: tensor>, - %arg1: tensor<6x?x?x8x3x!quant.uniform> + %arg1: tensor<6x?x?x8x3x!quant.uniform> ) -> tensor> { // CHECK: %[[DOT_RES:.*]] = "mhlo.dot_general" // CHECK-SAME: lhs_batching_dimensions = [0, 2] @@ -1068,17 +1001,17 @@ func.func @dot_general_multiple_dynamic_dims( // Zero point offset contribution from LHS tensor * RHS ZP. - // CHECK: %[[LHS_I32:.*]] = mhlo.convert %[[LHS:.*]] : (tensor) - // CHECK-SAME: -> tensor - // CHECK: %[[LHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor - // CHECK: %[[LHS_REDUCE:.*]] = mhlo.reduce(%[[LHS_I32]] init: %[[LHS_REDUCE_INIT]]) - // CHECK-SAME: applies mhlo.add across dimensions = [4, 3] - // CHECK-SAME: (tensor, tensor) - // CHECK-SAME: -> tensor - // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<5> : tensor - // CHECK: %[[LHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply - // CHECK-SAME: %[[LHS_REDUCE]], %[[RHS_ZP]] : - // CHECK-SAME: (tensor, tensor) -> tensor + // CHECK: %[[RHS_I32:.*]] = mhlo.convert %[[RHS:.*]] : (tensor<6x?x?x8x3xi8>) + // CHECK-SAME: -> tensor<6x?x?x8x3xi32> + // CHECK: %[[RHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor + // CHECK: %[[RHS_REDUCE:.*]] = mhlo.reduce(%[[RHS_I32]] init: %[[RHS_REDUCE_INIT]]) + // CHECK-SAME: applies mhlo.add across dimensions = [0, 2] + // CHECK-SAME: (tensor<6x?x?x8x3xi32>, tensor) + // CHECK-SAME: -> tensor + // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<3> : tensor + // CHECK: %[[RHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[RHS_REDUCE]], %[[RHS_ZP]] : + // CHECK-SAME: (tensor, tensor) -> tensor // Calculate output dynamic dims. // CHECK: %[[DIM_1_1:.*]] = "mhlo.get_dimension_size"(%[[DOT_RES]]) @@ -1094,40 +1027,10 @@ func.func @dot_general_multiple_dynamic_dims( // CHECK: %[[OUTPUT_DIMS:.*]] = "mhlo.concatenate" // CHECK-SAME: %[[DIM_1]], %[[DIM_2]], %[[DIM_3]], %[[DIM_4]] - // CHECK: %[[LHS_ZP_BCAST:.*]] = "mhlo.dynamic_broadcast_in_dim" - // CHECK-SAME: (%[[LHS_ZP_CONTRIB]], %[[OUTPUT_DIMS]]) - // CHECK-SAME: broadcast_dimensions = dense<[0, 2, 1]> - // CHECK-SAME: (tensor, tensor<4xi64>) -> tensor - - // Zero point offset contribution from RHS tensor * LHS ZP. - - // CHECK: %[[RHS_I32:.*]] = mhlo.convert %[[RHS:.*]] : (tensor<6x?x?x8x3xi8>) - // CHECK-SAME: -> tensor<6x?x?x8x3xi32> - // CHECK: %[[RHS_REDUCE_INIT:.*]] = mhlo.constant dense<0> : tensor - // CHECK: %[[RHS_REDUCE:.*]] = mhlo.reduce(%[[RHS_I32]] init: %[[RHS_REDUCE_INIT]]) - // CHECK-SAME: applies mhlo.add across dimensions = [0, 2] - // CHECK-SAME: (tensor<6x?x?x8x3xi32>, tensor) - // CHECK-SAME: -> tensor - // CHECK: %[[RHS_ZP:.*]] = mhlo.constant dense<3> : tensor - // CHECK: %[[RHS_ZP_CONTRIB:.*]] = chlo.broadcast_multiply - // CHECK-SAME: %[[RHS_REDUCE]], %[[RHS_ZP]] : - // CHECK-SAME: (tensor, tensor) -> tensor - // CHECK: %[[RHS_ZP_BCAST:.*]] = "mhlo.dynamic_broadcast_in_dim" // CHECK-SAME: (%[[RHS_ZP_CONTRIB]], %[[OUTPUT_DIMS]]) // CHECK-SAME: broadcast_dimensions = dense<[0, 3, 1]> // CHECK-SAME: (tensor, tensor<4xi64>) -> tensor - // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.add %[[LHS_ZP_BCAST]], %[[RHS_ZP_BCAST]] - - // Zero point offset contribution from LHS ZP * RHS ZP. - - // CHECK: %[[ZPS_INIT:.*]] = mhlo.constant dense<1> : tensor - // CHECK: %[[DYN_DIM:.*]] = "mhlo.get_dimension_size"(%[[RHS]]) - // CHECK: %[[ZPS_1:.*]] = mhlo.multiply %[[ZPS_INIT]], %[[DYN_DIM]] - // CHECK: %[[STATIC_DIM:.*]] = mhlo.constant dense<90> : tensor - // CHECK: %[[ZPS:.*]] = mhlo.multiply %[[STATIC_DIM]], %[[ZPS_1]] - // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_subtract %[[ZP_TOTAL_1]], %[[ZPS]] - // CHECK-SAME: (tensor, tensor) -> tensor // Combine dot result with zero point offset and output final result. @@ -1139,17 +1042,17 @@ func.func @dot_general_multiple_dynamic_dims( // CHECK: %[[RES_INT:.*]] = mhlo.convert %[[RES_FP_1]] // CHECK-SAME: (tensor) -> tensor - // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] + // CHECK: %[[ZP_TOTAL_1:.*]] = mhlo.convert %[[RHS_ZP_BCAST]] // CHECK-SAME: (tensor) -> tensor - // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_multiply - // CHECK-SAME: %[[ZP_TOTAL_3:.*]], %[[COMBINED_SCALE]] - // CHECK: %[[ZP_TOTAL_5:.*]] = mhlo.convert %[[ZP_TOTAL_4]] + // CHECK: %[[ZP_TOTAL_2:.*]] = chlo.broadcast_multiply + // CHECK-SAME: %[[ZP_TOTAL_1:.*]], %[[COMBINED_SCALE]] + // CHECK: %[[ZP_TOTAL_3:.*]] = mhlo.convert %[[ZP_TOTAL_2]] // CHECK-SAME: (tensor) -> tensor // CHECK: %[[RES_ZP:.*]] = mhlo.constant dense<7> : tensor - // CHECK: %[[ZP_TOTAL_6:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_5]] + // CHECK: %[[ZP_TOTAL_4:.*]] = chlo.broadcast_subtract %[[RES_ZP]], %[[ZP_TOTAL_3]] // CHECK-SAME: (tensor, tensor) -> tensor - // CHECK: chlo.broadcast_add %[[RES_INT]], %[[ZP_TOTAL_6]] + // CHECK: chlo.broadcast_add %[[RES_INT]], %[[ZP_TOTAL_4]] %0 = "mhlo.dot_general" (%arg0, %arg1) { dot_dimension_numbers = #mhlo.dot< @@ -1159,7 +1062,7 @@ func.func @dot_general_multiple_dynamic_dims( rhs_contracting_dimensions = [0, 2] >} : ( tensor>, - tensor<6x?x?x8x3x!quant.uniform> + tensor<6x?x?x8x3x!quant.uniform> ) -> tensor> return %0 : tensor> } @@ -1811,14 +1714,13 @@ func.func @dot_general_hybrid_per_channel( // CHECK-SAME: %[[ARG1:.*]]: tensor<2x2xi8> func.func @dot_general_hybrid_per_channel_asymmetric( %arg0: tensor<3x2xf32>, - %arg1: tensor<2x2x!quant.uniform:f32:1, {3.000000e+00:10, 4.000000e+00:20}>> + %arg1: tensor<2x2x!quant.uniform:f32:1, {3.000000e+00:0, 4.000000e+00:0}>> ) -> tensor<3x2xf32> { // CHECK-DAG: %[[BARRIER:.*]] = mhlo.optimization_barrier %[[ARG1]] : tensor<2x2xi8> // CHECK-DAG: %[[SCALES:.*]] = mhlo.constant dense<[3.000000e+00, 4.000000e+00]> : tensor<2xf32> - // CHECK-DAG: %[[ZPS:.*]] = mhlo.constant dense<[1.000000e+01, 2.000000e+01]> : tensor<2xf32> + // CHECK-DAG: %[[ZPS:.*]] = mhlo.constant dense<0.000000e+00> : tensor<2xf32> // CHECK-DAG: %[[CONVERT:.*]] = mhlo.convert %[[BARRIER]] : (tensor<2x2xi8>) -> tensor<2x2xf32> - // CHECK: %[[SUB:.*]] = chlo.broadcast_subtract %[[CONVERT]], %[[ZPS]] {broadcast_dimensions = array} : (tensor<2x2xf32>, tensor<2xf32>) -> tensor<2x2xf32> - // CHECK: %[[MUL:.*]] = chlo.broadcast_multiply %[[SUB]], %[[SCALES]] {broadcast_dimensions = array} : (tensor<2x2xf32>, tensor<2xf32>) -> tensor<2x2xf32> + // CHECK: %[[MUL:.*]] = chlo.broadcast_multiply %[[CONVERT]], %[[SCALES]] {broadcast_dimensions = array} : (tensor<2x2xf32>, tensor<2xf32>) -> tensor<2x2xf32> // CHECK: %[[DOT:.*]] = "mhlo.dot_general"(%[[ARG0]], %[[MUL]]) // CHECK-SAME: (tensor<3x2xf32>, tensor<2x2xf32>) -> tensor<3x2xf32> // CHECK: return %[[DOT]] @@ -1827,7 +1729,7 @@ func.func @dot_general_hybrid_per_channel_asymmetric( dot_dimension_numbers = #mhlo.dot} : ( tensor<3x2xf32>, - tensor<2x2x!quant.uniform:f32:1, {3.000000e+00:10, 4.000000e+00:20}>> + tensor<2x2x!quant.uniform:f32:1, {3.000000e+00:0, 4.000000e+00:0}>> ) -> tensor<3x2xf32> return %0 : tensor<3x2xf32> } @@ -1989,9 +1891,9 @@ func.func @conv2d_hybrid_result_not_float( // ----- -func.func @dot_general_hybrid_result_not_float( - %arg0: tensor<2x5x6xf32>, - %arg1: tensor<6x8x2x!quant.uniform>) { +func.func @dot_general_non_hybrid_result_not_float( + %arg0: tensor<2x5x6x!quant.uniform>, + %arg1: tensor<6x8x2x!quant.uniform>) { // expected-error@+2 {{Invalid input/output type for Dot/Convolution op}} // expected-error@+1 {{failed to legalize operation 'mhlo.dot_general' that was explicitly marked illegal}} %0 = "mhlo.dot_general" (%arg0, %arg1) { @@ -2001,8 +1903,8 @@ func.func @dot_general_hybrid_result_not_float( lhs_contracting_dimensions = [2], rhs_contracting_dimensions = [0] >} : ( - tensor<2x5x6xf32>, - tensor<6x8x2x!quant.uniform> + tensor<2x5x6x!quant.uniform>, + tensor<6x8x2x!quant.uniform> ) -> tensor<2x5x8x!quant.uniform> return } diff --git a/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/ops.mlir b/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/ops.mlir index 3f6e71bee0b7ff..b4bdfba82540b2 100644 --- a/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/ops.mlir +++ b/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/ops.mlir @@ -3449,7 +3449,7 @@ func.func @compatible_shapes(%arg0: tensor, %shape: tensor<2xindex>) -> t // ----- func.func @incompatible_shapes(%arg0: tensor, %shape: tensor<2xindex>) -> tensor { - // expected-error @+1 {{output should have a rank equal to the number of elements in output_shape}} + // expected-error @+1 {{result should have a rank equal to the number of elements in output_shape}} %0 = "mhlo.dynamic_reshape"(%arg0, %shape) : (tensor, tensor<2xindex>) -> tensor func.return %0 : tensor } @@ -4061,7 +4061,7 @@ func.func @dynamic_gather(%operand : tensor<2x4x9xi32>, %start_indices : tensor< // ----- -func.func @dynamic_gather(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor) -> tensor { +func.func @dynamic_gather(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor { %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { dimension_numbers = #mhlo.gather< collapsed_slice_dims = [0, 1], @@ -4070,13 +4070,13 @@ func.func @dynamic_gather(%operand : tensor, %start_indices : tensor< start_index_map = [0, 1] >, indices_are_sorted = false - } : (tensor, tensor, tensor) -> tensor + } : (tensor, tensor, tensor<3xi32>) -> tensor func.return %res : tensor } // ----- -func.func @dynamic_gather(%operand : tensor<2x4x9xi32>, %start_indices : tensor, %slice_sizes : tensor) -> tensor { +func.func @dynamic_gather(%operand : tensor<2x4x9xi32>, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor { %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { dimension_numbers = #mhlo.gather< collapsed_slice_dims = [0, 1], @@ -4085,14 +4085,31 @@ func.func @dynamic_gather(%operand : tensor<2x4x9xi32>, %start_indices : tensor< start_index_map = [0, 1] >, indices_are_sorted = false - } : (tensor<2x4x9xi32>, tensor, tensor) -> tensor + } : (tensor<2x4x9xi32>, tensor, tensor<3xi32>) -> tensor func.return %res : tensor } // ----- -func.func @dynamic_gather(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor) -> tensor { - // @expected-error@+2 {{'mhlo.dynamic_gather' op failed to infer returned types}} +func.func @dynamic_gather_c1(%operand : tensor<2x4x9xi32>, %start_indices : tensor<1x5x2xi32>, %slice_sizes : tensor<3xi32>) -> tensor<1x5x8xi32> { + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{offset_dims size (2) plus collapse_slice_dims size (2) is not equal to operand rank (3)}} + %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { + dimension_numbers = #mhlo.gather< + offset_dims = [1, 2], + collapsed_slice_dims = [0, 1], + start_index_map = [0, 1], + index_vector_dim = 2 + >, + indices_are_sorted = false + } : (tensor<2x4x9xi32>, tensor<1x5x2xi32>, tensor<3xi32>) -> tensor<1x5x8xi32> + func.return %res : tensor<1x5x8xi32> +} + +// ----- + +func.func @dynamic_gather_c2(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor { + // expected-error@+2 {{failed to infer returned types}} // expected-error@+1 {{index_vector_dim 4 is out of bounds for start indices with rank 3}} %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { dimension_numbers = #mhlo.gather< @@ -4102,65 +4119,221 @@ func.func @dynamic_gather(%operand : tensor, %start_indices : tensor< start_index_map = [0, 1] >, indices_are_sorted = false - } : (tensor, tensor, tensor) -> tensor + } : (tensor, tensor, tensor<3xi32>) -> tensor + func.return %res : tensor +} + +// ----- + +func.func @dynamic_gather_c3(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor { + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{start_index_map size (1) is not equal to size of index dimension (2) of start_indices (2)}} + %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { + dimension_numbers = #mhlo.gather< + collapsed_slice_dims = [0, 1], + index_vector_dim = 2, + offset_dims = [2], + start_index_map = [0] + >, + indices_are_sorted = false + } : (tensor, tensor, tensor<3xi32>) -> tensor + func.return %res : tensor +} + +// ----- + +func.func @dynamic_gather_c4(%operand : tensor<2x4x9xi32>, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor { + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{expects offset_dims to be sorted, got: [2, 1]}} + %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { + dimension_numbers = #mhlo.gather< + collapsed_slice_dims = [0, 1], + index_vector_dim = 2, + offset_dims = [2, 1], + start_index_map = [0, 1] + >, + indices_are_sorted = false + } : (tensor<2x4x9xi32>, tensor, tensor<3xi32>) -> tensor func.return %res : tensor } // ----- -func.func @dynamic_gather(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor) -> tensor { - // @expected-error@+2 {{'mhlo.dynamic_gather' op failed to infer returned types}} - // expected-error@+1 {{offset_dims size (2) plus collapse_slice_dims size (2) is not equal to operand rank (3)}} +func.func @dynamic_gather_c5(%operand : tensor<2x4x9xi32>, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor { + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{offset_dims[0]: -1 is out of bounds for implied result rank 3}} %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { dimension_numbers = #mhlo.gather< collapsed_slice_dims = [0, 1], index_vector_dim = 2, - offset_dims = [1, 2], + offset_dims = [-1], start_index_map = [0, 1] >, indices_are_sorted = false - } : (tensor, tensor, tensor) -> tensor + } : (tensor<2x4x9xi32>, tensor, tensor<3xi32>) -> tensor func.return %res : tensor } // ----- -func.func @dynamic_gather(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor) -> tensor { - // @expected-error@+2 {{'mhlo.dynamic_gather' op failed to infer returned types}} - // expected-error@+1 {{start_index_map size (1) is not equal to size of index dimension (2) of start_indices (2)}} +func.func @dynamic_gather_c5(%operand : tensor<2x4x9xi32>, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor { + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{offset_dims[0]: 3 is out of bounds for implied result rank 3}} %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { dimension_numbers = #mhlo.gather< collapsed_slice_dims = [0, 1], index_vector_dim = 2, + offset_dims = [3], + start_index_map = [0, 1] + >, + indices_are_sorted = false + } : (tensor<2x4x9xi32>, tensor, tensor<3xi32>) -> tensor + func.return %res : tensor +} + +// ----- + +func.func @dynamic_gather_c6(%operand : tensor<2x4x9xi32>, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor { + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{expects collapsed_slice_dims to be sorted, got: [1, 0]}} + %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { + dimension_numbers = #mhlo.gather< + collapsed_slice_dims = [1, 0], + index_vector_dim = 2, offset_dims = [2], - start_index_map = [0] + start_index_map = [0, 1] >, indices_are_sorted = false - } : (tensor, tensor, tensor) -> tensor + } : (tensor<2x4x9xi32>, tensor, tensor<3xi32>) -> tensor func.return %res : tensor } // ----- -func.func @dynamic_gather(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor) -> tensor { - // @expected-error@+2 {{'mhlo.dynamic_gather' op failed to infer returned types}} - // expected-error@+1 {{slice_sizes.rank != 1}} +func.func @dynamic_gather_c6(%operand : tensor<2x4x9xi32>, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor { + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{expects collapsed_slice_dims to not repeat, got: [1, 1]}} %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { dimension_numbers = #mhlo.gather< - collapsed_slice_dims = [0, 1], + collapsed_slice_dims = [1, 1], index_vector_dim = 2, offset_dims = [2], start_index_map = [0, 1] >, indices_are_sorted = false - } : (tensor, tensor, tensor) -> tensor + } : (tensor<2x4x9xi32>, tensor, tensor<3xi32>) -> tensor func.return %res : tensor } // ----- -func.func @dynamic_gather(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor<2xi32>) -> tensor { - // @expected-error@+2 {{'mhlo.dynamic_gather' op failed to infer returned types}} +func.func @dynamic_gather_c7(%operand : tensor<2x4x9xi32>, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor { + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{collapsed dimension -1 is out of bounds for slice_sizes.size (3)}} + %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { + dimension_numbers = #mhlo.gather< + collapsed_slice_dims = [-1, 1], + index_vector_dim = 2, + offset_dims = [2], + start_index_map = [0, 1] + >, + indices_are_sorted = false + } : (tensor<2x4x9xi32>, tensor, tensor<3xi32>) -> tensor + func.return %res : tensor +} + +// ----- + +func.func @dynamic_gather_c7(%operand : tensor<2x4x9xi32>, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor { + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{collapsed dimension 17 is out of bounds for slice_sizes.size (3)}} + %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { + dimension_numbers = #mhlo.gather< + collapsed_slice_dims = [0, 17], + index_vector_dim = 2, + offset_dims = [2], + start_index_map = [0, 1] + >, + indices_are_sorted = false + } : (tensor<2x4x9xi32>, tensor, tensor<3xi32>) -> tensor + func.return %res : tensor +} + +// ----- + +func.func @dynamic_gather_c8(%operand : tensor, %start_indices : tensor) -> tensor { + %slize_sizes = mhlo.constant dense<[1,1,8]> : tensor<3xi32> + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{slice_sizes collapsed dimension 2 should <= 1 but got 8}} + %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slize_sizes) { + dimension_numbers = #mhlo.gather< + offset_dims = [2], + collapsed_slice_dims = [0, 2], + start_index_map = [0, 1], + index_vector_dim = 2 + >, + indices_are_sorted = false + } : (tensor, tensor, tensor<3xi32>) -> tensor + func.return %res : tensor +} + +// ----- + +func.func @dynamic_gather_c9(%operand : tensor<2x4x9xi32>, %start_indices : tensor<1x5x2xi32>, %slize_sizes : tensor<3xi32>) -> tensor<1x5x8xi32> { + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{expects start_index_map to not repeat, got: [0, 0]}} + %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slize_sizes) { + dimension_numbers = #mhlo.gather< + offset_dims = [2], + collapsed_slice_dims = [0, 1], + start_index_map = [0, 0], + index_vector_dim = 2 + >, + indices_are_sorted = false + } : (tensor<2x4x9xi32>, tensor<1x5x2xi32>, tensor<3xi32>) -> tensor<1x5x8xi32> + func.return %res : tensor<1x5x8xi32> +} + +// ----- + +func.func @dynamic_gather_c10(%operand : tensor<2x4x9xi32>, %start_indices : tensor<1x5x2xi32>, %slize_sizes : tensor<3xi32>) -> tensor<1x5x8xi32> { + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{start_index_map[0]: -2 is out of bounds for operand rank 3}} + %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slize_sizes) { + dimension_numbers = #mhlo.gather< + offset_dims = [2], + collapsed_slice_dims = [0, 1], + start_index_map = [-2, -1], + index_vector_dim = 2 + >, + slice_sizes = array, + indices_are_sorted = false + } : (tensor<2x4x9xi32>, tensor<1x5x2xi32>, tensor<3xi32>) -> tensor<1x5x8xi32> + func.return %res : tensor<1x5x8xi32> +} + +// ----- + +func.func @dynamic_gather_c10(%operand : tensor<2x4x9xi32>, %start_indices : tensor<1x5x2xi32>, %slize_sizes : tensor<3xi32>) -> tensor<1x5x8xi32> { + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{start_index_map[1]: 3 is out of bounds for operand rank 3}} + %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slize_sizes) { + dimension_numbers = #mhlo.gather< + offset_dims = [2], + collapsed_slice_dims = [0, 1], + start_index_map = [0, 3], + index_vector_dim = 2 + >, + slice_sizes = array, + indices_are_sorted = false + } : (tensor<2x4x9xi32>, tensor<1x5x2xi32>, tensor<3xi32>) -> tensor<1x5x8xi32> + func.return %res : tensor<1x5x8xi32> +} + +// ----- + +func.func @dynamic_gather_c11(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor<2xi32>) -> tensor { + // expected-error@+2 {{failed to infer returned types}} // expected-error@+1 {{slice_sizes size (2) not equal to (implied) operand rank (3)}} %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { dimension_numbers = #mhlo.gather< @@ -4176,8 +4349,45 @@ func.func @dynamic_gather(%operand : tensor, %start_indices : tensor< // ----- -func.func @dynamic_gather(%operand : tensor<2x4x9xi32>, %start_indices : tensor<1x5x2xi32>, %slice_sizes : tensor<3xi32>) -> tensor<3xi32> { - // @expected-error@+2 {{'mhlo.dynamic_gather' op failed to infer returned types}} +func.func @dynamic_gather_c12(%operand : tensor, %start_indices : tensor) -> tensor { + %slice_sizes = mhlo.constant dense<[1,1,-1]> : tensor<3xi32> + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{slice size (-1) is out of bounds for operand dimension (2) at index 2}} + %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { + dimension_numbers = #mhlo.gather< + offset_dims = [2], + collapsed_slice_dims = [0, 1], + start_index_map = [0, 1], + index_vector_dim = 2 + >, + indices_are_sorted = false + } : (tensor, tensor, tensor<3xi32>) -> tensor + func.return %res : tensor +} + +// ----- + +func.func @dynamic_gather_c12(%operand : tensor, %start_indices : tensor) -> tensor { + %slice_sizes = mhlo.constant dense<[1,1,8]> : tensor<3xi32> + // expected-error@+2 {{failed to infer returned types}} + // expected-error@+1 {{slice size (8) is out of bounds for operand dimension (2) at index 2}} + %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { + dimension_numbers = #mhlo.gather< + offset_dims = [2], + collapsed_slice_dims = [0, 1], + start_index_map = [0, 1], + index_vector_dim = 2 + >, + slice_sizes = array, + indices_are_sorted = false + } : (tensor, tensor, tensor<3xi32>) -> tensor + func.return %res : tensor +} + +// ----- + +func.func @dynamic_gather_c13(%operand : tensor<2x4x9xi32>, %start_indices : tensor<1x5x2xi32>, %slice_sizes : tensor<3xi32>) -> tensor<3xi32> { + // expected-error@+2 {{failed to infer returned types}} // expected-error@+1 {{inferred type(s) 'tensor<1x5x?xi32>' are incompatible with return type(s) of operation 'tensor<3xi32>'}} %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { dimension_numbers = #mhlo.gather< @@ -4193,8 +4403,8 @@ func.func @dynamic_gather(%operand : tensor<2x4x9xi32>, %start_indices : tensor< // ----- -func.func @dynamic_gather(%operand : tensor<2x4x9xi32>, %start_indices : tensor<1x5x2xi32>, %slice_sizes : tensor) -> tensor<3xi32> { - // @expected-error@+2 {{'mhlo.dynamic_gather' op failed to infer returned types}} +func.func @dynamic_gather_c13(%operand : tensor<2x4x9xi32>, %start_indices : tensor<1x5x2xi32>, %slice_sizes : tensor<3xi32>) -> tensor<3xi32> { + // expected-error@+2 {{failed to infer returned types}} // expected-error@+1 {{inferred type(s) 'tensor<1x5x?xi32>' are incompatible with return type(s) of operation 'tensor<3xi32>'}} %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { dimension_numbers = #mhlo.gather< @@ -4204,14 +4414,14 @@ func.func @dynamic_gather(%operand : tensor<2x4x9xi32>, %start_indices : tensor< start_index_map = [0, 1] >, indices_are_sorted = false - } : (tensor<2x4x9xi32>, tensor<1x5x2xi32>, tensor) -> tensor<3xi32> + } : (tensor<2x4x9xi32>, tensor<1x5x2xi32>, tensor<3xi32>) -> tensor<3xi32> func.return %res : tensor<3xi32> } // ----- -func.func @dynamic_gather(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor<3xi32> { - // @expected-error@+2 {{'mhlo.dynamic_gather' op failed to infer returned types}} +func.func @dynamic_gather_c13(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor<3xi32> { + // expected-error@+2 {{failed to infer returned types}} // expected-error@+1 {{inferred type(s) 'tensor' are incompatible with return type(s) of operation 'tensor<3xi32>'}} %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { dimension_numbers = #mhlo.gather< @@ -4227,8 +4437,8 @@ func.func @dynamic_gather(%operand : tensor, %start_indices : tensor< // ----- -func.func @dynamic_gather(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor) -> tensor { - // @expected-error@+2 {{'mhlo.dynamic_gather' op failed to infer returned types}} +func.func @dynamic_gather_c13(%operand : tensor, %start_indices : tensor, %slice_sizes : tensor<3xi32>) -> tensor { + // expected-error@+2 {{failed to infer returned types}} // expected-error@+1 {{inferred type(s) 'tensor' are incompatible with return type(s) of operation 'tensor'}} %res = "mhlo.dynamic_gather"(%operand, %start_indices, %slice_sizes) { dimension_numbers = #mhlo.gather< @@ -4238,7 +4448,7 @@ func.func @dynamic_gather(%operand : tensor, %start_indices : tensor< start_index_map = [0, 1] >, indices_are_sorted = false - } : (tensor, tensor, tensor) -> tensor + } : (tensor, tensor, tensor<3xi32>) -> tensor func.return %res : tensor } @@ -5532,7 +5742,7 @@ func.func @quantized_dot_i4(%arg0: tensor<2x2x!quant.uniform>, %a // ----- // CHECK-LABEL: func @quantized_dot_general -func.func @quantized_dot_general(%arg0: tensor<2x16x32x!quant.uniform>, %arg1: tensor<2x32x32x!quant.uniform>) -> tensor<2x16x32x!quant.uniform> { +func.func @quantized_dot_general(%arg0: tensor<2x16x32x!quant.uniform>, %arg1: tensor<2x32x32x!quant.uniform>) -> tensor<2x16x32x!quant.uniform> { %0 = "mhlo.dot_general"(%arg0, %arg1) { dot_dimension_numbers = #mhlo.dot< lhs_batching_dimensions = [0], @@ -5541,7 +5751,7 @@ func.func @quantized_dot_general(%arg0: tensor<2x16x32x!quant.uniform, precision_config = [#mhlo, #mhlo]} - : (tensor<2x16x32x!quant.uniform>, tensor<2x32x32x!quant.uniform>) -> tensor<2x16x32x!quant.uniform> + : (tensor<2x16x32x!quant.uniform>, tensor<2x32x32x!quant.uniform>) -> tensor<2x16x32x!quant.uniform> func.return %0 : tensor<2x16x32x!quant.uniform> } @@ -5779,19 +5989,6 @@ func.func @pad_dynamic(%arg0: tensor) -> tensor // ----- -func.func @pad_i2(%arg0: tensor<1x2x3xf16>, %arg1: tensor<2xf16>) -> tensor<2x4x7xf16> { - // @expected-error@+2 {{'mhlo.pad' op failed to infer returned types}} - // expected-error@+1 {{padding value type should be a rank-0 tensor, is rank 1}} - %0 = "mhlo.pad"(%arg0, %arg1) { - edge_padding_low = dense<[0, 1, 2]> : tensor<3xi64>, - edge_padding_high = dense<[1, 1, 0]> : tensor<3xi64>, - interior_padding = dense<[0, 0, 1]> : tensor<3xi64> - } : (tensor<1x2x3xf16>, tensor<2xf16>) -> tensor<2x4x7xf16> - func.return %0 : tensor<2x4x7xf16> -} - -// ----- - func.func @pad_i3(%arg0: tensor<1x2x3xf16>, %arg1: tensor) -> tensor<2x4x7xf16> { // @expected-error@+2 {{'mhlo.pad' op failed to infer returned types}} // expected-error@+1 {{edge_padding_low has rank 0 instead of required rank 1}} diff --git a/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/stablehlo-legalize-to-hlo.mlir b/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/stablehlo-legalize-to-hlo.mlir index 963e2a46459e44..7107b7d615e7b8 100644 --- a/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/stablehlo-legalize-to-hlo.mlir +++ b/third_party/xla/xla/mlir_hlo/tests/Dialect/mhlo/stablehlo-legalize-to-hlo.mlir @@ -806,21 +806,19 @@ func.func @op_dynamic_broadcast_in_dim(%arg0: tensor, %arg1: tensor<2xind } // CHECK-LABEL: "op_dynamic_conv" -func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> { +func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<2x2xi32>) -> tensor<1x?x?x16xf32> { // CHECK: "mhlo.dynamic_conv"([[ARG0:%arg[0-9]+]], [[ARG1:%arg[0-9]+]], [[ARG2:%arg[0-9]+]]) <{ // CHECK-SAME: batch_group_count = 1 : i64, // CHECK-SAME: dimension_numbers = #mhlo.conv<[b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f]>, // CHECK-SAME: feature_group_count = 1 : i64, // CHECK-SAME: lhs_dilation = dense<1> : tensor<2xi64>, - // CHECK-SAME: padding = dense<1> : tensor<2x2xi64>, // CHECK-SAME: precision_config = [#mhlo, #mhlo], // CHECK-SAME: rhs_dilation = dense<1> : tensor<2xi64>, // CHECK-SAME: window_reversal = dense : tensor<2xi1>, // CHECK-SAME: window_strides = dense<1> : tensor<2xi64> - // CHECK-SAME: }> : (tensor<1x8x8x207xf32>, tensor<3x3x207x16xf32>, tensor<4xi32>) -> tensor<1x?x?x16xf32> + // CHECK-SAME: }> : (tensor<1x8x8x207xf32>, tensor<3x3x207x16xf32>, tensor<2x2xi32>) -> tensor<1x?x?x16xf32> %0 = "stablehlo.dynamic_conv"(%arg0, %arg1, %arg2) { window_strides = array, - padding = dense<1> : tensor<2x2xi64>, lhs_dilation = array, rhs_dilation = array, window_reversal = array, @@ -828,7 +826,7 @@ func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x1 feature_group_count = 1 : i64, batch_group_count = 1 : i64, precision_config = [#stablehlo, #stablehlo] - } : (tensor<1x8x8x207xf32>, tensor<3x3x207x16xf32>, tensor<4xi32>) -> tensor<1x?x?x16xf32> + } : (tensor<1x8x8x207xf32>, tensor<3x3x207x16xf32>, tensor<2x2xi32>) -> tensor<1x?x?x16xf32> func.return %0 : tensor<1x?x?x16xf32> } @@ -1579,17 +1577,6 @@ func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) func.return %0 : tensor<2x1x5xf32> } -// CHECK-LABEL: "op_trace" -func.func @op_trace(%arg0: tensor) { - // CHECK: "mhlo.trace"([[ARG0:%arg[0-9]+]]) <{ - // CHECK-SAME: tag = "foo" - // CHECK-SAME: }> : (tensor) -> () - "stablehlo.trace"(%arg0) { - tag = "foo" - } : (tensor) -> () - func.return -} - // CHECK-LABEL: "op_transpose" func.func @op_transpose(%arg0: tensor<16x8xf32>) -> tensor<8x16xf32> { // CHECK: "mhlo.transpose"([[ARG0:%arg[0-9]+]]) <{ From 86eff97b8c84bacca4b8c02b998cb6c4b2b6d9ef Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 10:22:16 -0700 Subject: [PATCH 119/478] Support converting strings to unsigned integer types in `tf.strings.to_number`. PiperOrigin-RevId: 633628312 --- tensorflow/core/kernels/string_to_number_op.cc | 4 ++++ tensorflow/core/ops/parsing_ops.cc | 2 +- tensorflow/python/ops/string_ops.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/string_to_number_op.cc b/tensorflow/core/kernels/string_to_number_op.cc index d0ee2a96e4572a..7d1553874d9a7a 100644 --- a/tensorflow/core/kernels/string_to_number_op.cc +++ b/tensorflow/core/kernels/string_to_number_op.cc @@ -16,6 +16,8 @@ limitations under the License. // See docs in ../ops/parse_ops.cc. #include + +#include #include #include "tensorflow/core/framework/kernel_def_builder.h" @@ -68,6 +70,8 @@ REGISTER(float); REGISTER(double); REGISTER(int32); REGISTER(int64_t); +REGISTER(uint32_t); +REGISTER(uint64_t); #undef REGISTER } // namespace tensorflow diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc index d765455b6f1864..a3e801a87099c7 100644 --- a/tensorflow/core/ops/parsing_ops.cc +++ b/tensorflow/core/ops/parsing_ops.cc @@ -498,7 +498,7 @@ REGISTER_OP("DecodeCSV") REGISTER_OP("StringToNumber") .Input("string_tensor: string") .Output("output: out_type") - .Attr("out_type: {float, double, int32, int64} = DT_FLOAT") + .Attr("out_type: {float, double, int32, int64, uint32, uint64} = DT_FLOAT") .SetShapeFn(shape_inference::UnchangedShape); } // namespace tensorflow diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index 2e3bb68e0c20f2..b6c0e46c5f1028 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -478,7 +478,7 @@ def string_to_number(input, out_type=dtypes.float32, name=None): Args: input: A `Tensor` of type `string`. out_type: An optional `tf.DType` from: `tf.float32, tf.float64, tf.int32, - tf.int64`. Defaults to `tf.float32`. + tf.int64, tf.uint32, tf.uint64`. Defaults to `tf.float32`. The numeric type to interpret each string in `string_tensor` as. name: A name for the operation (optional). From 7fb5f26eaa627d5e280e3c52ad1d3464764e716d Mon Sep 17 00:00:00 2001 From: Kyle Lucke Date: Tue, 14 May 2024 10:25:34 -0700 Subject: [PATCH 120/478] Add a typedef to allow users to easily create the correct Factory type to create a TypedKernel. PiperOrigin-RevId: 633629571 --- third_party/xla/xla/stream_executor/kernel.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/third_party/xla/xla/stream_executor/kernel.h b/third_party/xla/xla/stream_executor/kernel.h index f90ad3fc50fbf5..677e8db1a37b35 100644 --- a/third_party/xla/xla/stream_executor/kernel.h +++ b/third_party/xla/xla/stream_executor/kernel.h @@ -338,6 +338,9 @@ class TypedKernel { operator bool() const { return static_cast(kernel_); } // NOLINT + // Type of factory used to create a TypedKernel. + using FactoryType = TypedKernelFactory; + private: friend class TypedKernelFactory; explicit TypedKernel(std::unique_ptr kernel) From f6cc0f900c36d8f4634b07202cd6d46a2e4ffaac Mon Sep 17 00:00:00 2001 From: Vlad Sytchenko Date: Tue, 14 May 2024 10:33:13 -0700 Subject: [PATCH 121/478] [XLA] Account for private stack allocations in HloRematerialization Buffer assignment allocates a single stack for all asynchronous computations, which persists for the entire duration of the program. We need to account for this by adjusting the memory limit. PiperOrigin-RevId: 633632062 --- .../xla/xla/service/gpu/gpu_compiler.cc | 3 +- .../xla/xla/service/hlo_rematerialization.cc | 74 +++++++++++++-- .../xla/xla/service/hlo_rematerialization.h | 25 +++-- .../xla/service/hlo_rematerialization_test.cc | 91 ++++++++++++++++++- 4 files changed, 169 insertions(+), 24 deletions(-) diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index 55edb1e4b00336..6d293297d2ffad 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -2186,8 +2186,7 @@ absl::Status GpuCompiler::RunPostSchedulingPipelines( // Assume 75% of the total device memory is available for XLA. /*memory_limit_bytes=*/scheduler_mem_limit, /*block_size_limit=*/1, /*block_rematerialization_factor=*/1, - /*min_remat_size=*/0, /*compact_shape_function=*/nullptr, - /*host_memory_offload_config=*/std::nullopt); + /*min_remat_size=*/0, /*compact_shape_function=*/nullptr); HloRematerialization::RematerializationSizes sizes; pipeline.AddPass(options, sizes); pipeline.AddPass(); diff --git a/third_party/xla/xla/service/hlo_rematerialization.cc b/third_party/xla/xla/service/hlo_rematerialization.cc index 0dbacab5e7bbf9..a948cde80aa7d2 100644 --- a/third_party/xla/xla/service/hlo_rematerialization.cc +++ b/third_party/xla/xla/service/hlo_rematerialization.cc @@ -22,8 +22,8 @@ limitations under the License. #include #include #include -#include #include +#include #include #include #include @@ -56,7 +56,6 @@ limitations under the License. #include "xla/shape_util.h" #include "xla/status.h" #include "xla/status_macros.h" -#include "xla/statusor.h" #include "xla/util.h" #include "tsl/platform/errors.h" @@ -2761,7 +2760,9 @@ absl::StatusOr HloRematerialization::RematerializeComputation( // in the callee computations. for (HloComputation* called_computation : callsite->called_computations()) { - if (!ContainsKey(rematerialized_computations_, called_computation)) { + if (!ContainsKey(rematerialized_computations_, called_computation) && + HloInstruction::IsThreadIncluded( + called_computation->execution_thread(), execution_threads)) { // Memory limit for the subcomputation is the memory limit less the // amount of memory used at this point in the computation. int64_t subcomputation_memory_limit_bytes = std::max( @@ -2862,15 +2863,67 @@ absl::StatusOr HloRematerialization::Run( module_output_size += options_.hlo_cost_analysis.GetShapeSize(subshape); }); - const int64_t adjusted_memory_limit_bytes = + int64_t adjusted_memory_limit_bytes = std::max(0, options_.memory_limit_bytes - module_output_size); VLOG(1) << "Adjusted memory limit accounting for output (" << HumanReadableNumBytes(module_output_size) << "): " << HumanReadableNumBytes(adjusted_memory_limit_bytes); + call_graph_ = CallGraph::Build(module); + + // Buffer assignment allocates a single stack for all asynchronous + // computations of the same thread, which persists for the entire duration of + // the program. We need to account for this by adjusting the memory limit. + int64_t total_async_peak_memory = 0; + if (!options_.async_computation_parallelism.empty()) { + // We cannot compute memory usage for both the main and asynchronous threads + // at the same time, as that will cause the asynchronous callee usage to be + // added to the main thread callers usage. The callee's memory is + // preallocated, so the caller doesn't pay for it. + absl::flat_hash_set async_threads; + for (const auto& [computation, _] : + options_.async_computation_parallelism) { + async_threads.insert(computation->execution_thread()); + } + TF_RETURN_IF_ERROR(call_graph_->VisitNodes( + [this, module, &async_threads](const CallGraphNode& node) -> Status { + auto callee_thread = node.computation()->execution_thread(); + if (node.context() == CallContext::kControlFlow && + HloInstruction::IsThreadIncluded(callee_thread, async_threads)) { + TF_ASSIGN_OR_RETURN(computation_peak_memory_[node.computation()], + ComputePeakMemory(node.computation(), + module->schedule().sequence( + node.computation()), + {callee_thread})); + } + return OkStatus(); + }, + /*visit_unreachable_nodes=*/false)); + + int64_t async_peak_memory = 0; + // Only consider asynchronous computations invoked from the main thread. + for (const auto [entry_computation, parallel_threads] : + options_.async_computation_parallelism) { + const int64_t peak_memory = + computation_peak_memory_.at(entry_computation); + // Adjust memory usage for parallel execution of the same computation + // on different devices. + const int64_t parallel_peak_memory = peak_memory * parallel_threads; + async_peak_memory = std::max(async_peak_memory, parallel_peak_memory); + } + adjusted_memory_limit_bytes = + std::max(0, adjusted_memory_limit_bytes - async_peak_memory); + total_async_peak_memory += async_peak_memory; + VLOG(1) << "Adjusted memory limit accounting for async computations (" + << HumanReadableNumBytes(async_peak_memory) + << "): " << HumanReadableNumBytes(adjusted_memory_limit_bytes); + + // Reset back to a clean state, since we don't expect to utilize the + // async computation memory usage anymore. + computation_peak_memory_.clear(); + } // Compute peak memory usage of all computations in the module called in a // sequential context. - call_graph_ = CallGraph::Build(module); TF_RETURN_IF_ERROR(call_graph_->VisitNodes( [this, module, &execution_threads](const CallGraphNode& node) -> Status { if (node.context() == CallContext::kControlFlow && @@ -2887,12 +2940,13 @@ absl::StatusOr HloRematerialization::Run( /*visit_unreachable_nodes=*/false)); // The peak memory usage of the module equals the peak memory use of the entry - // computation plus the output size of the computation. This is because the - // peak memory for a computation does not include the output as this is - // typically accounted for in the caller. + // computation plus the output size of the computation plus memory use of + // asynchronous computations. This is because the peak memory for a + // computation does not include the output as this is typically accounted for + // in the caller. const int64_t before_peak_memory = computation_peak_memory_.at(module->entry_computation()) + - module_output_size; + module_output_size + total_async_peak_memory; VLOG(1) << "Peak memory usage of module (before): " << HumanReadableNumBytes(before_peak_memory); @@ -2928,7 +2982,7 @@ absl::StatusOr HloRematerialization::Run( << net_instructions_added_ << " net instructions added"; const int64_t current_peak_memory = computation_peak_memory_.at(module->entry_computation()) + - module_output_size; + module_output_size + total_async_peak_memory; VLOG(1) << "Peak memory usage of module now " << HumanReadableNumBytes(current_peak_memory) << " (" << current_peak_memory << " bytes), was " diff --git a/third_party/xla/xla/service/hlo_rematerialization.h b/third_party/xla/xla/service/hlo_rematerialization.h index 4fae1cc2ddbbd0..e5bb5cc8aa8d2e 100644 --- a/third_party/xla/xla/service/hlo_rematerialization.h +++ b/third_party/xla/xla/service/hlo_rematerialization.h @@ -90,13 +90,15 @@ class HloRematerialization : public HloModulePass { static Shape DefaultCompactShapeFunction(const Shape& shape) { return shape; } struct Options { - explicit Options( - HloCostAnalysis& hlo_cost_analysis, - const RematerializationModeConfig& remat_mode_config, - int64_t memory_limit_bytes, int block_size_limit, - int block_rematerialization_factor, int64_t min_remat_size, - CompactShapeFunction compact_shape_function, - std::optional host_memory_offload_config) + explicit Options(HloCostAnalysis& hlo_cost_analysis, + const RematerializationModeConfig& remat_mode_config, + int64_t memory_limit_bytes, int block_size_limit, + int block_rematerialization_factor, int64_t min_remat_size, + CompactShapeFunction compact_shape_function, + std::optional + host_memory_offload_config = std::nullopt, + absl::flat_hash_map + async_computation_parallelism = {}) : hlo_cost_analysis(hlo_cost_analysis), remat_mode_config(remat_mode_config), memory_limit_bytes(memory_limit_bytes), @@ -106,7 +108,8 @@ class HloRematerialization : public HloModulePass { compact_shape_function(compact_shape_function == nullptr ? DefaultCompactShapeFunction : std::move(compact_shape_function)), - host_memory_offload_config(host_memory_offload_config) {} + host_memory_offload_config(host_memory_offload_config), + async_computation_parallelism(async_computation_parallelism) {} // The cost model used for decisions during rematerialization for host // memory offload. It is also used for getting Shape size. @@ -133,7 +136,7 @@ class HloRematerialization : public HloModulePass { // return for potentially reduced memory consumption. int block_rematerialization_factor; - // The minimim size, in bytes, of a tensor to be considered for + // The minimum size, in bytes, of a tensor to be considered for // rematerialization. All tensors smaller than this size will be skipped // over. int64_t min_remat_size; @@ -143,6 +146,10 @@ class HloRematerialization : public HloModulePass { CompactShapeFunction compact_shape_function; std::optional host_memory_offload_config; + + // Collection of async entry computations and their number of parallel + // invocations. + absl::flat_hash_map async_computation_parallelism; }; explicit HloRematerialization(Options options, RematerializationSizes& sizes) diff --git a/third_party/xla/xla/service/hlo_rematerialization_test.cc b/third_party/xla/xla/service/hlo_rematerialization_test.cc index 780c881aa94bf1..b30cf8293e48e9 100644 --- a/third_party/xla/xla/service/hlo_rematerialization_test.cc +++ b/third_party/xla/xla/service/hlo_rematerialization_test.cc @@ -47,6 +47,88 @@ namespace op = xla::testing::opcode_matchers; using ::testing::_; +class AsyncRematerializationTest : public RematerializationTestBase { + protected: + absl::StatusOr RunHloRematerialization( + int64_t memory_limit_bytes, HloModule* module, + const absl::flat_hash_map& + async_computation_parallelism, + int64_t min_remat_size = 0) { + TF_EXPECT_OK(verifier().Run(module).status()); + if (!module->has_schedule()) { + HloMemoryScheduler scheduler( + [](const BufferValue& buffer) { return ByteSizeOf(buffer.shape()); }, + ComputationSchedulerToModuleScheduler(DefaultMemoryScheduler)); + TF_EXPECT_OK(scheduler.Run(module).status()); + } + HloRematerialization::RematerializationModeConfig config( + /*recompute=*/true, /*compress=*/true, /*host_offload=*/false); + auto shape_size_func = [](const Shape& shape) { return ByteSizeOf(shape); }; + HloCostAnalysis cost_analysis(shape_size_func); + HloRematerialization::Options options( + cost_analysis, config, memory_limit_bytes, + /*block_size_limit=*/1, /*block_rematerialization_factor=*/1, + min_remat_size, /*compact_shape_function=*/nullptr, + /*host_memory_offload_config=*/std::nullopt, + /*async_computation_parallelism=*/async_computation_parallelism); + HloRematerialization::RematerializationSizes sizes; + HloRematerialization remat(options, sizes); + return remat.Run(module, {HloInstruction::kMainExecutionThread}); + } + + static constexpr int64_t kNumParallelThreads = 16; +}; + +TEST_F(AsyncRematerializationTest, AsyncComputation) { + constexpr std::string_view hlo = R"( +HloModule async, is_scheduled=true + +%offload_computation { + %param = f32[1]{0} parameter(0) + %reshape = f32[] reshape(f32[1]{0} %param) + %broadcast = f32[1024]{0} broadcast(f32[] %reshape), dimensions={} + %negate = f32[1024]{0} negate(f32[1024]{0} %broadcast) + %concatenate = f32[2048]{0} concatenate(f32[1024]{0} %negate, f32[1024]{0} %negate), dimensions={0} + %slice = f32[1]{0} slice(f32[2048]{0} %concatenate), slice={[0:1]} + %concatenate.1 = f32[1025]{0} concatenate(f32[1024]{0} %broadcast, f32[1]{0} %slice), dimensions={0} + ROOT %slice.1 = f32[1]{0} slice(f32[1025]{0} %concatenate.1), slice={[0:1]} +} + +%main_computation { + %param = f32[1]{0} parameter(0) + %reshape = f32[] reshape(f32[1]{0} %param) + %broadcast = f32[1024]{0} broadcast(f32[] %reshape), dimensions={} + %negate = f32[1024]{0} negate(f32[1024]{0} %broadcast) + %concatenate = f32[2048]{0} concatenate(f32[1024]{0} %negate, f32[1024]{0} %negate), dimensions={0} + %slice = f32[1]{0} slice(f32[2048]{0} %concatenate), slice={[0:1]} + %concatenate.1 = f32[1025]{0} concatenate(f32[1024]{0} %broadcast, f32[1]{0} %slice), dimensions={0} + ROOT %slice.1 = f32[1]{0} slice(f32[1025]{0} %concatenate.1), slice={[0:1]} +} + +ENTRY %main { + %param = f32[1]{0} parameter(0) + %call-start = ((f32[1]{0}), f32[1]{0}, s32[]) call-start(f32[1]{0} %param), to_apply=%offload_computation, async_execution_thread="offload" + %call-done = f32[1]{0} call-done(((f32[1]{0}), f32[1]{0}, s32[]) %call-start) + ROOT %call = f32[1]{0} call(f32[1]{0} %call-done), to_apply=%main_computation +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, ParseAndReturnVerifiedModule(hlo)); + + HloInstruction* call_start = FindInstruction(module.get(), "call-start"); + // Computation requires 16KB without rematerialization, but uses only 12KB + // with rematerialization so pick a memory limit between these values (14KB). + // Asynchronous computation will run on 16 devices and we do not rematerialize + // it, so it will reserve 16 * 16Kb from the memory limit. + TF_ASSERT_OK_AND_ASSIGN( + bool changed, + RunHloRematerialization( + /*memory_limit_bytes=*/kNumParallelThreads * 16 * 1024 + 14 * 1024, + module.get(), + {{call_start->async_wrapped_computation(), kNumParallelThreads}})); + EXPECT_TRUE(changed); +} + // Inherits methods to create rematerializable computations. See // RematerializationTestBase for more. class RecomputeAndCompressHloRematerializationTest @@ -80,7 +162,8 @@ class RecomputeAndCompressHloRematerializationTest cost_analysis, config, memory_limit_bytes, /*block_size_limit=*/1, /*block_rematerialization_factor=*/1, min_remat_size, /*compact_shape_function=*/nullptr, - /*host_memory_offload_config=*/std::nullopt); + /*host_memory_offload_config=*/std::nullopt, + /*async_threads=*/{}); HloRematerialization::RematerializationSizes sizes; HloRematerialization remat(options, sizes); absl::StatusOr result = remat.Run(module); @@ -1105,7 +1188,8 @@ class CompressingRematerializationTest : public RematerializationTestBase { cost_analysis, config, memory_limit_bytes, /*block_size_limit=*/1, /*block_rematerialization_factor=*/1, min_remat_size, ChooseCompactLayoutForShape, - /*host_memory_offload_config=*/std::nullopt); + /*host_memory_offload_config=*/std::nullopt, + /*async_threads=*/{}); HloRematerialization::RematerializationSizes sizes; HloRematerialization remat(options, sizes); return remat.Run(module); @@ -1314,7 +1398,8 @@ class OffloadingRematerializationTest : public RematerializationTestBase { cost_analysis, config, memory_limit_bytes, /*block_size_limit=*/1, /*block_rematerialization_factor=*/1, min_remat_size, /*compact_shape_function=*/nullptr, - host_memory_offload_config); + host_memory_offload_config, + /*async_threads=*/{}); HloRematerialization::RematerializationSizes sizes; HloRematerialization remat(options, sizes); return remat.Run(module); From b10ad05b3b2f78d1c0c96ec7194f58bbb0be54e9 Mon Sep 17 00:00:00 2001 From: Goran Flegar Date: Tue, 14 May 2024 10:52:12 -0700 Subject: [PATCH 122/478] Add a flag to override the GEMM autotuner with a specified textproto configuration. This makes it easier to debug issues with Triton, since [persisted autotuning](https://openxla.org/xla/persisted_autotuning) does not offer a workflow that would be efficient enough: - `--xla_gpu_dump_autotune_results_to=` doesn't work if the autotuner crashes, so we would need to construct the cache manually from the logs; furthermore, the cache only applies to a specific HW, so we can't use it to override the autotuner on a different GPU - `--xla_gpu_load_autotune_results_from=` reads a file so we need to copy the file into the source tree, and modify the BUILD file to include it into the build; with this new flag, we can just copy the flag from the VLOG without having to fiddle around with any files PiperOrigin-RevId: 633638219 --- third_party/xla/xla/debug_options_flags.cc | 8 ++++ third_party/xla/xla/service/gpu/BUILD | 1 + .../xla/service/gpu/gemm_fusion_autotuner.cc | 43 +++++++++++++++++-- third_party/xla/xla/xla.proto | 4 +- 4 files changed, 51 insertions(+), 5 deletions(-) diff --git a/third_party/xla/xla/debug_options_flags.cc b/third_party/xla/xla/debug_options_flags.cc index 831a26b2c96793..aa95aa8a3f6120 100644 --- a/third_party/xla/xla/debug_options_flags.cc +++ b/third_party/xla/xla/debug_options_flags.cc @@ -1472,6 +1472,14 @@ void MakeDebugOptionsFlags(std::vector* flag_list, "Dumps autotuned GEMM fusions to the directory specified by " "xla_dump_to or stdout. Each fusion is dumped only once, as an optimized " "HLO.")); + flag_list->push_back(tsl::Flag( + "xla_gpu_override_gemm_autotuner", + string_setter_for(&DebugOptions::set_xla_gpu_override_gemm_autotuner), + debug_options->xla_gpu_override_gemm_autotuner(), + "Overrides the GEMM autotuner to use the specified " + "(AutotuneResult::TritonGemmKey) textproto configuration for all Triton " + "GEMM fusions. (You can get such textprotos from the debug logs of the " + "GEMM autotuner.) ")); flag_list->push_back(tsl::Flag( "xla_gpu_copy_insertion_use_region_analysis", bool_setter_for( diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 91776ad61dee01..8b841bcf9be6ea 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -758,6 +758,7 @@ cc_library( "@local_tsl//tsl/platform:blocking_counter", "@local_tsl//tsl/platform:env", "@local_tsl//tsl/platform:errors", + "@local_tsl//tsl/platform:protobuf", "@local_tsl//tsl/platform:status", "@local_tsl//tsl/platform:statusor", "@local_tsl//tsl/profiler/lib:scoped_annotation", diff --git a/third_party/xla/xla/service/gpu/gemm_fusion_autotuner.cc b/third_party/xla/xla/service/gpu/gemm_fusion_autotuner.cc index d5b34fb877add3..a2a1cc1c063493 100644 --- a/third_party/xla/xla/service/gpu/gemm_fusion_autotuner.cc +++ b/third_party/xla/xla/service/gpu/gemm_fusion_autotuner.cc @@ -90,6 +90,7 @@ limitations under the License. #include "tsl/platform/blocking_counter.h" #include "tsl/platform/errors.h" #include "tsl/platform/path.h" +#include "tsl/platform/protobuf.h" #include "tsl/platform/status.h" #include "tsl/platform/statusor.h" #include "tsl/platform/threadpool.h" @@ -501,6 +502,17 @@ absl::Status DumpAutotunedFusion(const AutotuneConfig& autotune_config, return absl::OkStatus(); } +std::string Serialize(const Config& config) { + if (auto triton_config = std::get_if(&config)) { + tsl::protobuf::TextFormat::Printer printer; + printer.SetSingleLineMode(true); + std::string result; + printer.PrintToString(triton_config->ToProto(), &result); + return result; + } + return GemmFusionAutotunerImpl::ToString(config); +} + } // anonymous namespace // Methods required for sorting the configs. @@ -757,12 +769,18 @@ GemmFusionAutotunerImpl::CompileAll( const HloFusionInstruction* fusion = key_value.first; const std::vector& gemm_config_set = key_value.second; - VLOG(10) << "Compiling the fusion: " << fusion->name(); + VLOG(10) << "Compiling fusion: " << fusion->name(); VLOG(10) << "Dumping fusion computation: " << fusion->called_computation()->ToString(); for (const Config& config : gemm_config_set) { thread_pool_->Schedule([&, fusion] { - VLOG(10) << "Trying configuration: " << ToString(config); + VLOG(10) << "Trying configuration forceable through: " + "--xla_gpu_override_gemm_autotuner='" + << Serialize(config) << "'"; + VLOG(10) << "WARNING: you are running in multithreaded-mode, the " + "last configuration printed out might not be the one " + "causing issues! Use " + "--xla_gpu_force_compilation_parallelism=1 to fix."; absl::StatusOr has_executable = compile(fusion, config, gemm_config_set.size() > 1); TF_CHECK_OK(has_executable.status()) @@ -790,11 +808,13 @@ GemmFusionAutotunerImpl::CompileAll( const HloFusionInstruction* fusion = key_value.first; const auto& gemm_config_set = key_value.second; - VLOG(10) << "Compiling the fusion: " << fusion->name(); + VLOG(10) << "Compiling fusion: " << fusion->name(); VLOG(10) << "Dumping fusion computation: " << fusion->called_computation()->ToString(); for (const Config& config : gemm_config_set) { - VLOG(10) << "Trying configuration: " << ToString(config); + VLOG(10) << "Trying configuration forceable through: " + "--xla_gpu_override_gemm_autotuner='" + << Serialize(config) << "'"; TF_ASSIGN_OR_RETURN( bool has_executable, compile(fusion, config, gemm_config_set.size() > 1)); @@ -1123,6 +1143,21 @@ absl::StatusOr GemmFusionAutotuner::Run( tsl::proto_utils::ToDurationProto(absl::ZeroDuration()); AutotunerUtil::AddResult(key, res); } + } else if (!debug_options.xla_gpu_override_gemm_autotuner().empty()) { + // TODO(gflegar): support overriding with non-Triton configs (cuBLAS, cuDNN) + AutotuneResult::TritonGemmKey gemm_key; + CHECK(tsl::protobuf::TextFormat::ParseFromString( + debug_options.xla_gpu_override_gemm_autotuner(), &gemm_key)); + VLOG(1) << "Overriding GEMM autotuner with the following config: " + << gemm_key.DebugString(); + for (const auto& [fusion, unused] : gemm_config_sets) { + const AutotuneCacheKey key = AutotunerUtil::GetKey(fusion, config_); + AutotuneResult res; + *res.mutable_triton() = gemm_key; + *res.mutable_run_time() = + tsl::proto_utils::ToDurationProto(absl::ZeroDuration()); + AutotunerUtil::AddResult(key, res); + } } else if (!config_.IsDeviceless()) { TF_ASSIGN_OR_RETURN(std::optional opt_compile_util, AutotunerCompileUtil::Create(config_, debug_options)); diff --git a/third_party/xla/xla/xla.proto b/third_party/xla/xla/xla.proto index b5044afe4d6292..56ddedc2d6416b 100644 --- a/third_party/xla/xla/xla.proto +++ b/third_party/xla/xla/xla.proto @@ -633,6 +633,8 @@ message DebugOptions { bool xla_gpu_dump_autotuned_gemm_fusions = 232; + string xla_gpu_override_gemm_autotuner = 295; + bool xla_gpu_copy_insertion_use_region_analysis = 236; // If true, each fusion instruction will have a cost model runtime estimate in @@ -767,7 +769,7 @@ message DebugOptions { // Base length to rewrite the reduce window to, no rewrite if set to 0. int64 xla_reduce_window_rewrite_base_length = 293; - // Next id: 295 + // Next id: 296 // Extra options to pass to the compilation backend (e.g. LLVM); specific // interpretation of these values is left to the backend. From d8d9f2e9ca26e531169f9a55012d66f90a81c01a Mon Sep 17 00:00:00 2001 From: Arturo Schmidt Date: Tue, 14 May 2024 11:15:31 -0700 Subject: [PATCH 123/478] Remove unused translate_tf_dialect. PiperOrigin-RevId: 633647120 --- tensorflow/compiler/mlir/BUILD | 1 - tensorflow/compiler/mlir/tensorflow/BUILD | 1 - .../mlir2graphdef/simple_tf_dialect_op.mlir | 32 -------- .../compiler/mlir/tensorflow/translate/BUILD | 16 ---- .../translate/translate_tf_dialect_op.cc | 75 ------------------- 5 files changed, 125 deletions(-) delete mode 100644 tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/simple_tf_dialect_op.mlir delete mode 100644 tensorflow/compiler/mlir/tensorflow/translate/translate_tf_dialect_op.cc diff --git a/tensorflow/compiler/mlir/BUILD b/tensorflow/compiler/mlir/BUILD index 810e4277e8c58f..46d5e7e9fb9005 100644 --- a/tensorflow/compiler/mlir/BUILD +++ b/tensorflow/compiler/mlir/BUILD @@ -226,7 +226,6 @@ tf_cc_binary( "//tensorflow/compiler/mlir/tensorflow:translate_cl_options", "//tensorflow/compiler/mlir/tensorflow:translate_lib", "//tensorflow/compiler/mlir/tensorflow:translate_registration", - "//tensorflow/compiler/mlir/tensorflow:translate_tf_dialect_op", "//tensorflow/core:lib", "//tensorflow/core:tensorflow", "@com_google_absl//absl/strings", diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 0be47767d4cabe..85f8644fc2d88f 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -1664,7 +1664,6 @@ aliased_targets = [ "export_graphdef", "import_model", "export_tf_dialect_op", - "translate_tf_dialect_op", "mlir_roundtrip_flags", "mlir_import_options", "translate_lib", diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/simple_tf_dialect_op.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/simple_tf_dialect_op.mlir deleted file mode 100644 index 780406e0c16127..00000000000000 --- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/simple_tf_dialect_op.mlir +++ /dev/null @@ -1,32 +0,0 @@ -// RUN: tf-mlir-translate -test-only-mlir-to-tf-nodedef %s -o - | FileCheck %s - -func.func @main() { -^bb0: - // CHECK: name: "node_name" - // CHECK-NEXT: op: "Const" - // CHECK-NEXT: attr { - // CHECK: key: "dtype" - // CHECK-NEXT: value { - // CHECK-NEXT: type: DT_INT32 - // CHECK-NEXT: } - // CHECK-NEXT: } - // CHECK-NEXT: attr { - // CHECK-NEXT: key: "value" - // CHECK-NEXT: value { - // CHECK-NEXT: tensor { - // CHECK-NEXT: dtype: DT_INT32 - // CHECK-NEXT: tensor_shape { - // CHECK-NEXT: dim { - // CHECK-NEXT: size: 2 - // CHECK-NEXT: } - // CHECK-NEXT: } - // CHECK-NEXT: tensor_content: "\200\000\000\000\200\000\000\000" - // CHECK: experimental_debug_info { - // CHECK-NEXT: original_node_names: "n1" - // CHECK-NEXT: original_func_names: "f1" - // CHECK-NEXT: } - %0 = "tf.Const"() {value = #tf_type : tensor<2xi32>} : () -> (tensor<2xi32>) loc(fused[callsite("n1@f1" at callsite("node_name" at "file_loc"))]) - func.return -} - - diff --git a/tensorflow/compiler/mlir/tensorflow/translate/BUILD b/tensorflow/compiler/mlir/tensorflow/translate/BUILD index 620e4d7783609e..f0280340dddf62 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/translate/BUILD @@ -112,22 +112,6 @@ cc_library( ], ) -cc_library( - name = "translate_tf_dialect_op", - srcs = ["translate_tf_dialect_op.cc"], - deps = [ - ":export_tf_dialect_op", - "//tensorflow/compiler/mlir/tensorflow", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:FuncDialect", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:Support", - "@llvm-project//mlir:TranslateLib", - "@local_tsl//tsl/platform:protobuf", - ], - alwayslink = 1, -) - cc_library( name = "mlir_roundtrip_flags", srcs = ["mlir_roundtrip_flags.cc"], diff --git a/tensorflow/compiler/mlir/tensorflow/translate/translate_tf_dialect_op.cc b/tensorflow/compiler/mlir/tensorflow/translate/translate_tf_dialect_op.cc deleted file mode 100644 index 856db032e501ae..00000000000000 --- a/tensorflow/compiler/mlir/tensorflow/translate/translate_tf_dialect_op.cc +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/ToolOutputFile.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project -#include "mlir/IR/BuiltinOps.h" // from @llvm-project -#include "mlir/IR/Location.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/Tools/mlir-translate/Translation.h" // from @llvm-project -#include "tensorflow/compiler/mlir/tensorflow/dialect_registration.h" -#include "tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.h" -#include "tsl/platform/protobuf.h" - -namespace mlir { -static mlir::Operation* ExtractOnlyOp(mlir::ModuleOp module) { - mlir::func::FuncOp fn = module.lookupSymbol("main"); - if (!fn) return nullptr; - - if (!llvm::hasSingleElement(fn)) return nullptr; - - // Here, modules with exactly two operations in the only basic block are - // supported. The last operation should be a terminator operation and the - // other operation is the operation of interest. - auto& block = fn.front(); - if (block.getOperations().size() != 2) return nullptr; - if (!block.back().hasTrait()) return nullptr; - - return &block.front(); -} - -static LogicalResult MlirToTfNodeDef(ModuleOp module, - llvm::raw_ostream& output) { - auto* context = module.getContext(); - - Operation* op = ExtractOnlyOp(module); - if (!op) { - emitError(UnknownLoc::get(context), - "modules with exactly one op other than terminator in a " - "'main' function's " - "only block are supported"); - return failure(); - } - - auto node_def_or = tensorflow::ConvertTFDialectOpToNodeDef( - op, "node_name", /*ignore_unregistered_attrs=*/false); - if (!node_def_or.ok()) { - op->emitError("failed to convert to TF NodeDef:") - << node_def_or.status().ToString(); - return failure(); - } - - output << tsl::LegacyUnredactedDebugString(*node_def_or.value()); - return success(); -} - -// Test only translation to convert a simple MLIR module with a single TF -// dialect op to NodeDef. -static TranslateFromMLIRRegistration translate_from_mlir_registration( - "test-only-mlir-to-tf-nodedef", "test-only-mlir-to-tf-nodedef", - MlirToTfNodeDef, mlir::RegisterAllTensorFlowDialects); - -} // namespace mlir From fe35120d7bb513623e46dbb2db41aa49bf00e36a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 11:22:48 -0700 Subject: [PATCH 124/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633649526 --- .../ops/compat/ops_history_v2/Abort.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Abs.pbtxt | 2 +- .../compat/ops_history_v2/AccumulateNV2.pbtxt | 2 +- .../AccumulatorApplyGradient.pbtxt | 2 +- .../AccumulatorNumAccumulated.pbtxt | 2 +- .../AccumulatorSetGlobalStep.pbtxt | 2 +- .../AccumulatorTakeGradient.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Acos.pbtxt | 2 +- .../ops/compat/ops_history_v2/Acosh.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Add.pbtxt | 2 +- .../AddManySparseToTensorsMap.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/AddN.pbtxt | 2 +- .../AddSparseToTensorsMap.pbtxt | 2 +- .../ops/compat/ops_history_v2/AddV2.pbtxt | 2 +- .../ops_history_v2/AdjustContrast.pbtxt | 2 +- .../ops_history_v2/AdjustContrastv2.pbtxt | 2 +- .../ops/compat/ops_history_v2/AdjustHue.pbtxt | 2 +- .../ops_history_v2/AdjustSaturation.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/All.pbtxt | 2 +- .../ops_history_v2/AllCandidateSampler.pbtxt | 2 +- .../ops/compat/ops_history_v2/AllToAll.pbtxt | 2 +- .../ops/compat/ops_history_v2/Angle.pbtxt | 2 +- .../ops_history_v2/AnonymousHashTable.pbtxt | 2 +- .../ops_history_v2/AnonymousIterator.pbtxt | 2 +- .../ops_history_v2/AnonymousIteratorV2.pbtxt | 2 +- .../ops_history_v2/AnonymousIteratorV3.pbtxt | 2 +- .../ops_history_v2/AnonymousMemoryCache.pbtxt | 2 +- .../AnonymousMultiDeviceIterator.pbtxt | 2 +- .../AnonymousMultiDeviceIteratorV3.pbtxt | 2 +- .../AnonymousMutableDenseHashTable.pbtxt | 2 +- .../AnonymousMutableHashTable.pbtxt | 2 +- .../AnonymousMutableHashTableOfTensors.pbtxt | 2 +- .../AnonymousRandomSeedGenerator.pbtxt | 2 +- .../AnonymousSeedGenerator.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Any.pbtxt | 2 +- .../compat/ops_history_v2/ApplyAdaMax.pbtxt | 2 +- .../compat/ops_history_v2/ApplyAdadelta.pbtxt | 2 +- .../compat/ops_history_v2/ApplyAdagrad.pbtxt | 2 +- .../ops_history_v2/ApplyAdagradDA.pbtxt | 2 +- .../ops_history_v2/ApplyAdagradV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/ApplyAdam.pbtxt | 2 +- .../compat/ops_history_v2/ApplyAddSign.pbtxt | 2 +- .../ops_history_v2/ApplyCenteredRMSProp.pbtxt | 2 +- .../ops/compat/ops_history_v2/ApplyFtrl.pbtxt | 2 +- .../compat/ops_history_v2/ApplyFtrlV2.pbtxt | 2 +- .../ops_history_v2/ApplyGradientDescent.pbtxt | 2 +- .../compat/ops_history_v2/ApplyMomentum.pbtxt | 2 +- .../ops_history_v2/ApplyPowerSign.pbtxt | 2 +- .../ops_history_v2/ApplyProximalAdagrad.pbtxt | 2 +- .../ApplyProximalGradientDescent.pbtxt | 2 +- .../compat/ops_history_v2/ApplyRMSProp.pbtxt | 2 +- .../compat/ops_history_v2/ApproxTopK.pbtxt | 2 +- .../ops_history_v2/ApproximateEqual.pbtxt | 2 +- .../ops/compat/ops_history_v2/ArgMax.pbtxt | 2 +- .../ops/compat/ops_history_v2/ArgMin.pbtxt | 2 +- .../ops/compat/ops_history_v2/AsString.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Asin.pbtxt | 2 +- .../ops/compat/ops_history_v2/Asinh.pbtxt | 2 +- .../ops/compat/ops_history_v2/Assert.pbtxt | 2 +- .../AssertCardinalityDataset.pbtxt | 2 +- .../ops_history_v2/AssertNextDataset.pbtxt | 2 +- .../ops_history_v2/AssertPrevDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/Assign.pbtxt | 2 +- .../ops/compat/ops_history_v2/AssignAdd.pbtxt | 2 +- .../ops_history_v2/AssignAddVariableOp.pbtxt | 2 +- .../ops/compat/ops_history_v2/AssignSub.pbtxt | 2 +- .../ops_history_v2/AssignSubVariableOp.pbtxt | 2 +- .../ops_history_v2/AssignVariableOp.pbtxt | 2 +- .../AssignVariableXlaConcatND.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Atan.pbtxt | 2 +- .../ops/compat/ops_history_v2/Atan2.pbtxt | 2 +- .../ops/compat/ops_history_v2/Atanh.pbtxt | 2 +- .../ops_history_v2/AudioSpectrogram.pbtxt | 2 +- .../compat/ops_history_v2/AudioSummary.pbtxt | 2 +- .../ops_history_v2/AudioSummaryV2.pbtxt | 2 +- .../ops_history_v2/AutoShardDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/AvgPool.pbtxt | 2 +- .../ops/compat/ops_history_v2/AvgPool3D.pbtxt | 2 +- .../compat/ops_history_v2/AvgPool3DGrad.pbtxt | 2 +- .../compat/ops_history_v2/AvgPoolGrad.pbtxt | 2 +- .../BandedTriangularSolve.pbtxt | 2 +- .../ops/compat/ops_history_v2/Barrier.pbtxt | 2 +- .../compat/ops_history_v2/BarrierClose.pbtxt | 2 +- .../BarrierIncompleteSize.pbtxt | 2 +- .../ops_history_v2/BarrierInsertMany.pbtxt | 2 +- .../ops_history_v2/BarrierReadySize.pbtxt | 2 +- .../ops_history_v2/BarrierTakeMany.pbtxt | 2 +- .../ops/compat/ops_history_v2/Batch.pbtxt | 2 +- .../compat/ops_history_v2/BatchCholesky.pbtxt | 2 +- .../ops_history_v2/BatchCholeskyGrad.pbtxt | 2 +- .../compat/ops_history_v2/BatchDataset.pbtxt | 2 +- .../ops_history_v2/BatchDatasetV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/BatchFFT.pbtxt | 2 +- .../compat/ops_history_v2/BatchFFT2D.pbtxt | 2 +- .../compat/ops_history_v2/BatchFFT3D.pbtxt | 2 +- .../compat/ops_history_v2/BatchFunction.pbtxt | 2 +- .../ops/compat/ops_history_v2/BatchIFFT.pbtxt | 2 +- .../compat/ops_history_v2/BatchIFFT2D.pbtxt | 2 +- .../compat/ops_history_v2/BatchIFFT3D.pbtxt | 2 +- .../compat/ops_history_v2/BatchMatMul.pbtxt | 2 +- .../compat/ops_history_v2/BatchMatMulV2.pbtxt | 2 +- .../compat/ops_history_v2/BatchMatMulV3.pbtxt | 2 +- .../ops_history_v2/BatchMatrixBandPart.pbtxt | 2 +- .../BatchMatrixDeterminant.pbtxt | 2 +- .../ops_history_v2/BatchMatrixDiag.pbtxt | 2 +- .../ops_history_v2/BatchMatrixDiagPart.pbtxt | 2 +- .../ops_history_v2/BatchMatrixInverse.pbtxt | 2 +- .../ops_history_v2/BatchMatrixSetDiag.pbtxt | 2 +- .../ops_history_v2/BatchMatrixSolve.pbtxt | 2 +- .../ops_history_v2/BatchMatrixSolveLs.pbtxt | 2 +- .../BatchMatrixTriangularSolve.pbtxt | 2 +- .../BatchNormWithGlobalNormalization.pbtxt | 2 +- ...BatchNormWithGlobalNormalizationGrad.pbtxt | 2 +- .../ops_history_v2/BatchSelfAdjointEig.pbtxt | 2 +- .../BatchSelfAdjointEigV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/BatchSvd.pbtxt | 2 +- .../compat/ops_history_v2/BatchToSpace.pbtxt | 2 +- .../ops_history_v2/BatchToSpaceND.pbtxt | 2 +- .../ops/compat/ops_history_v2/BesselI0.pbtxt | 2 +- .../ops/compat/ops_history_v2/BesselI0e.pbtxt | 2 +- .../ops/compat/ops_history_v2/BesselI1.pbtxt | 2 +- .../ops/compat/ops_history_v2/BesselI1e.pbtxt | 2 +- .../ops/compat/ops_history_v2/BesselJ0.pbtxt | 2 +- .../ops/compat/ops_history_v2/BesselJ1.pbtxt | 2 +- .../ops/compat/ops_history_v2/BesselK0.pbtxt | 2 +- .../ops/compat/ops_history_v2/BesselK0e.pbtxt | 2 +- .../ops/compat/ops_history_v2/BesselK1.pbtxt | 2 +- .../ops/compat/ops_history_v2/BesselK1e.pbtxt | 2 +- .../ops/compat/ops_history_v2/BesselY0.pbtxt | 2 +- .../ops/compat/ops_history_v2/BesselY1.pbtxt | 2 +- .../ops/compat/ops_history_v2/Betainc.pbtxt | 2 +- .../ops/compat/ops_history_v2/BiasAdd.pbtxt | 2 +- .../compat/ops_history_v2/BiasAddGrad.pbtxt | 2 +- .../ops/compat/ops_history_v2/BiasAddV1.pbtxt | 2 +- .../ops/compat/ops_history_v2/Bincount.pbtxt | 2 +- .../ops/compat/ops_history_v2/Bitcast.pbtxt | 2 +- .../compat/ops_history_v2/BitwiseAnd.pbtxt | 2 +- .../ops/compat/ops_history_v2/BitwiseOr.pbtxt | 2 +- .../compat/ops_history_v2/BitwiseXor.pbtxt | 2 +- .../ops/compat/ops_history_v2/BlockLSTM.pbtxt | 2 +- .../compat/ops_history_v2/BlockLSTMGrad.pbtxt | 2 +- .../ops_history_v2/BlockLSTMGradV2.pbtxt | 2 +- .../compat/ops_history_v2/BlockLSTMV2.pbtxt | 2 +- .../BoostedTreesAggregateStats.pbtxt | 2 +- .../BoostedTreesBucketize.pbtxt | 2 +- ...oostedTreesCalculateBestFeatureSplit.pbtxt | 2 +- ...stedTreesCalculateBestFeatureSplitV2.pbtxt | 2 +- ...tedTreesCalculateBestGainsPerFeature.pbtxt | 2 +- .../BoostedTreesCenterBias.pbtxt | 2 +- .../BoostedTreesCreateEnsemble.pbtxt | 2 +- ...tedTreesCreateQuantileStreamResource.pbtxt | 2 +- .../BoostedTreesDeserializeEnsemble.pbtxt | 2 +- ...BoostedTreesEnsembleResourceHandleOp.pbtxt | 2 +- .../BoostedTreesExampleDebugOutputs.pbtxt | 2 +- .../BoostedTreesFlushQuantileSummaries.pbtxt | 2 +- .../BoostedTreesGetEnsembleStates.pbtxt | 2 +- .../BoostedTreesMakeQuantileSummaries.pbtxt | 2 +- .../BoostedTreesMakeStatsSummary.pbtxt | 2 +- .../ops_history_v2/BoostedTreesPredict.pbtxt | 2 +- ...esQuantileStreamResourceAddSummaries.pbtxt | 2 +- ...eesQuantileStreamResourceDeserialize.pbtxt | 2 +- ...stedTreesQuantileStreamResourceFlush.pbtxt | 2 +- ...ileStreamResourceGetBucketBoundaries.pbtxt | 2 +- ...dTreesQuantileStreamResourceHandleOp.pbtxt | 2 +- .../BoostedTreesSerializeEnsemble.pbtxt | 2 +- .../BoostedTreesSparseAggregateStats.pbtxt | 2 +- ...TreesSparseCalculateBestFeatureSplit.pbtxt | 2 +- .../BoostedTreesTrainingPredict.pbtxt | 2 +- .../BoostedTreesUpdateEnsemble.pbtxt | 2 +- .../BoostedTreesUpdateEnsembleV2.pbtxt | 2 +- .../compat/ops_history_v2/BroadcastArgs.pbtxt | 2 +- .../BroadcastGradientArgs.pbtxt | 2 +- .../compat/ops_history_v2/BroadcastTo.pbtxt | 2 +- .../ops/compat/ops_history_v2/Bucketize.pbtxt | 2 +- .../BytesProducedStatsDataset.pbtxt | 2 +- .../CSRSparseMatrixComponents.pbtxt | 2 +- .../CSRSparseMatrixToDense.pbtxt | 2 +- .../CSRSparseMatrixToSparseTensor.pbtxt | 2 +- .../compat/ops_history_v2/CSVDataset.pbtxt | 2 +- .../compat/ops_history_v2/CSVDatasetV2.pbtxt | 2 +- .../ops_history_v2/CTCBeamSearchDecoder.pbtxt | 2 +- .../ops_history_v2/CTCGreedyDecoder.pbtxt | 2 +- .../ops/compat/ops_history_v2/CTCLoss.pbtxt | 2 +- .../ops/compat/ops_history_v2/CTCLossV2.pbtxt | 2 +- .../compat/ops_history_v2/CacheDataset.pbtxt | 2 +- .../ops_history_v2/CacheDatasetV2.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Case.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Cast.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Ceil.pbtxt | 2 +- .../compat/ops_history_v2/CheckNumerics.pbtxt | 2 +- .../ops_history_v2/CheckNumericsV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/Cholesky.pbtxt | 2 +- .../compat/ops_history_v2/CholeskyGrad.pbtxt | 2 +- .../ChooseFastestBranchDataset.pbtxt | 2 +- .../ops_history_v2/ChooseFastestDataset.pbtxt | 2 +- .../compat/ops_history_v2/ClipByValue.pbtxt | 2 +- .../ops_history_v2/CloseSummaryWriter.pbtxt | 2 +- .../CollateTPUEmbeddingMemory.pbtxt | 2 +- .../ops_history_v2/CollectiveAllToAllV2.pbtxt | 2 +- .../ops_history_v2/CollectiveAllToAllV3.pbtxt | 2 +- .../CollectiveAssignGroupV2.pbtxt | 2 +- .../ops_history_v2/CollectiveBcastRecv.pbtxt | 2 +- .../CollectiveBcastRecvV2.pbtxt | 2 +- .../ops_history_v2/CollectiveBcastSend.pbtxt | 2 +- .../CollectiveBcastSendV2.pbtxt | 2 +- .../ops_history_v2/CollectiveGather.pbtxt | 2 +- .../ops_history_v2/CollectiveGatherV2.pbtxt | 2 +- .../CollectiveInitializeCommunicator.pbtxt | 2 +- .../ops_history_v2/CollectivePermute.pbtxt | 2 +- .../ops_history_v2/CollectiveReduce.pbtxt | 2 +- .../CollectiveReduceScatterV2.pbtxt | 2 +- .../ops_history_v2/CollectiveReduceV2.pbtxt | 2 +- .../ops_history_v2/CollectiveReduceV3.pbtxt | 2 +- .../CombinedNonMaxSuppression.pbtxt | 2 +- .../ops/compat/ops_history_v2/Complex.pbtxt | 2 +- .../compat/ops_history_v2/ComplexAbs.pbtxt | 2 +- ...CompositeTensorVariantFromComponents.pbtxt | 2 +- .../CompositeTensorVariantToComponents.pbtxt | 2 +- .../ops_history_v2/CompressElement.pbtxt | 2 +- .../ComputeAccidentalHits.pbtxt | 2 +- .../ops_history_v2/ComputeBatchSize.pbtxt | 2 +- .../ops_history_v2/ComputeDedupDataSize.pbtxt | 2 +- .../ComputeDedupDataSizeV2.pbtxt | 2 +- .../ComputeDedupDataTupleMask.pbtxt | 2 +- .../ComputeDedupDataTupleMaskV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/Concat.pbtxt | 2 +- .../compat/ops_history_v2/ConcatOffset.pbtxt | 2 +- .../ops/compat/ops_history_v2/ConcatV2.pbtxt | 2 +- .../ops_history_v2/ConcatenateDataset.pbtxt | 2 +- .../ConditionalAccumulator.pbtxt | 2 +- .../ConfigureAndInitializeGlobalTPU.pbtxt | 2 +- .../ConfigureDistributedTPU.pbtxt | 2 +- .../ConfigureTPUEmbedding.pbtxt | 2 +- .../ConfigureTPUEmbeddingHost.pbtxt | 2 +- .../ConfigureTPUEmbeddingMemory.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Conj.pbtxt | 2 +- .../ops_history_v2/ConjugateTranspose.pbtxt | 2 +- .../ConnectTPUEmbeddingHosts.pbtxt | 2 +- .../ops/compat/ops_history_v2/Const.pbtxt | 2 +- .../ops_history_v2/ConsumeMutexLock.pbtxt | 2 +- .../ops_history_v2/ControlTrigger.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Conv.pbtxt | 2 +- .../ops/compat/ops_history_v2/Conv2D.pbtxt | 2 +- .../ops_history_v2/Conv2DBackpropFilter.pbtxt | 2 +- .../Conv2DBackpropFilterV2.pbtxt | 2 +- .../ops_history_v2/Conv2DBackpropInput.pbtxt | 2 +- .../Conv2DBackpropInputV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/Conv3D.pbtxt | 2 +- .../ops_history_v2/Conv3DBackpropFilter.pbtxt | 2 +- .../Conv3DBackpropFilterV2.pbtxt | 2 +- .../ops_history_v2/Conv3DBackpropInput.pbtxt | 2 +- .../Conv3DBackpropInputV2.pbtxt | 2 +- .../ops_history_v2/ConvertToCooTensor.pbtxt | 2 +- .../ConvertToListOfSparseCoreCooTensors.pbtxt | 2 +- ...nvertToSparseCoreCsrWrappedCooTensor.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Copy.pbtxt | 2 +- .../ops/compat/ops_history_v2/CopyHost.pbtxt | 2 +- .../compat/ops_history_v2/CopyToMesh.pbtxt | 2 +- .../ops_history_v2/CopyToMeshGrad.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Cos.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Cosh.pbtxt | 2 +- .../ops/compat/ops_history_v2/CountUpTo.pbtxt | 2 +- .../CreateSummaryDbWriter.pbtxt | 2 +- .../CreateSummaryFileWriter.pbtxt | 2 +- .../compat/ops_history_v2/CropAndResize.pbtxt | 2 +- .../CropAndResizeGradBoxes.pbtxt | 2 +- .../CropAndResizeGradImage.pbtxt | 2 +- .../ops/compat/ops_history_v2/Cross.pbtxt | 2 +- .../ops_history_v2/CrossReplicaSum.pbtxt | 2 +- .../ops/compat/ops_history_v2/CudnnRNN.pbtxt | 2 +- .../ops_history_v2/CudnnRNNBackprop.pbtxt | 2 +- .../ops_history_v2/CudnnRNNBackpropV2.pbtxt | 2 +- .../ops_history_v2/CudnnRNNBackpropV3.pbtxt | 2 +- .../CudnnRNNCanonicalToParams.pbtxt | 2 +- .../CudnnRNNCanonicalToParamsV2.pbtxt | 2 +- .../ops_history_v2/CudnnRNNParamsSize.pbtxt | 2 +- .../CudnnRNNParamsToCanonical.pbtxt | 2 +- .../CudnnRNNParamsToCanonicalV2.pbtxt | 2 +- .../compat/ops_history_v2/CudnnRNNV2.pbtxt | 2 +- .../compat/ops_history_v2/CudnnRNNV3.pbtxt | 2 +- .../ops/compat/ops_history_v2/Cumprod.pbtxt | 2 +- .../ops/compat/ops_history_v2/Cumsum.pbtxt | 2 +- .../ops_history_v2/CumulativeLogsumexp.pbtxt | 2 +- .../ops_history_v2/DTensorRestoreV2.pbtxt | 2 +- .../DTensorSetGlobalTPUArray.pbtxt | 2 +- .../ops_history_v2/DataFormatDimMap.pbtxt | 2 +- .../ops_history_v2/DataFormatVecPermute.pbtxt | 2 +- .../ops_history_v2/DataServiceDataset.pbtxt | 2 +- .../ops_history_v2/DataServiceDatasetV2.pbtxt | 2 +- .../ops_history_v2/DataServiceDatasetV3.pbtxt | 2 +- .../ops_history_v2/DataServiceDatasetV4.pbtxt | 2 +- .../ops_history_v2/DatasetCardinality.pbtxt | 2 +- .../ops_history_v2/DatasetFingerprint.pbtxt | 2 +- .../ops_history_v2/DatasetFromGraph.pbtxt | 2 +- .../ops_history_v2/DatasetToGraph.pbtxt | 2 +- .../ops_history_v2/DatasetToGraphV2.pbtxt | 2 +- .../DatasetToSingleElement.pbtxt | 2 +- .../ops_history_v2/DatasetToTFRecord.pbtxt | 2 +- .../ops/compat/ops_history_v2/Dawsn.pbtxt | 2 +- .../DebugGradientIdentity.pbtxt | 2 +- .../DebugGradientRefIdentity.pbtxt | 2 +- .../compat/ops_history_v2/DebugIdentity.pbtxt | 2 +- .../ops_history_v2/DebugIdentityV2.pbtxt | 2 +- .../ops_history_v2/DebugIdentityV3.pbtxt | 2 +- .../compat/ops_history_v2/DebugNanCount.pbtxt | 2 +- .../ops_history_v2/DebugNumericSummary.pbtxt | 2 +- .../DebugNumericSummaryV2.pbtxt | 2 +- .../ops_history_v2/DecodeAndCropJpeg.pbtxt | 2 +- .../compat/ops_history_v2/DecodeBase64.pbtxt | 2 +- .../ops/compat/ops_history_v2/DecodeBmp.pbtxt | 2 +- .../ops/compat/ops_history_v2/DecodeCSV.pbtxt | 2 +- .../ops_history_v2/DecodeCompressed.pbtxt | 2 +- .../ops/compat/ops_history_v2/DecodeGif.pbtxt | 2 +- .../compat/ops_history_v2/DecodeImage.pbtxt | 2 +- .../ops_history_v2/DecodeJSONExample.pbtxt | 2 +- .../compat/ops_history_v2/DecodeJpeg.pbtxt | 2 +- .../ops_history_v2/DecodePaddedRaw.pbtxt | 2 +- .../ops/compat/ops_history_v2/DecodePng.pbtxt | 2 +- .../compat/ops_history_v2/DecodeProtoV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/DecodeRaw.pbtxt | 2 +- .../ops/compat/ops_history_v2/DecodeWav.pbtxt | 2 +- .../ops/compat/ops_history_v2/DeepCopy.pbtxt | 2 +- .../ops_history_v2/DeleteIterator.pbtxt | 2 +- .../ops_history_v2/DeleteMemoryCache.pbtxt | 2 +- .../DeleteMultiDeviceIterator.pbtxt | 2 +- .../DeleteRandomSeedGenerator.pbtxt | 2 +- .../ops_history_v2/DeleteSeedGenerator.pbtxt | 2 +- .../ops_history_v2/DeleteSessionTensor.pbtxt | 2 +- .../compat/ops_history_v2/DenseBincount.pbtxt | 2 +- .../DenseCountSparseOutput.pbtxt | 2 +- .../DenseToCSRSparseMatrix.pbtxt | 2 +- .../DenseToDenseSetOperation.pbtxt | 2 +- .../DenseToSparseBatchDataset.pbtxt | 2 +- .../DenseToSparseSetOperation.pbtxt | 2 +- .../compat/ops_history_v2/DepthToSpace.pbtxt | 2 +- .../DepthwiseConv2dNative.pbtxt | 2 +- .../DepthwiseConv2dNativeBackpropFilter.pbtxt | 2 +- .../DepthwiseConv2dNativeBackpropInput.pbtxt | 2 +- .../compat/ops_history_v2/Dequantize.pbtxt | 2 +- .../ops_history_v2/DeserializeIterator.pbtxt | 2 +- .../DeserializeManySparse.pbtxt | 2 +- .../ops_history_v2/DeserializeSparse.pbtxt | 2 +- .../ops_history_v2/DestroyResourceOp.pbtxt | 2 +- .../DestroyTemporaryVariable.pbtxt | 2 +- .../compat/ops_history_v2/DeviceIndex.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Diag.pbtxt | 2 +- .../ops/compat/ops_history_v2/DiagPart.pbtxt | 2 +- .../ops/compat/ops_history_v2/Digamma.pbtxt | 2 +- .../compat/ops_history_v2/Dilation2D.pbtxt | 2 +- .../Dilation2DBackpropFilter.pbtxt | 2 +- .../Dilation2DBackpropInput.pbtxt | 2 +- .../DirectedInterleaveDataset.pbtxt | 2 +- .../ops_history_v2/DisableCopyOnRead.pbtxt | 2 +- .../ops_history_v2/DistributedSave.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Div.pbtxt | 2 +- .../ops/compat/ops_history_v2/DivNoNan.pbtxt | 2 +- .../ops_history_v2/DrawBoundingBoxes.pbtxt | 2 +- .../ops_history_v2/DrawBoundingBoxesV2.pbtxt | 2 +- .../DummyIterationCounter.pbtxt | 2 +- .../ops_history_v2/DummyMemoryCache.pbtxt | 2 +- .../ops_history_v2/DummySeedGenerator.pbtxt | 2 +- ...ueueTPUEmbeddingArbitraryTensorBatch.pbtxt | 2 +- ...EnqueueTPUEmbeddingRaggedTensorBatch.pbtxt | 2 +- .../ops_history_v2/DynamicPartition.pbtxt | 2 +- .../compat/ops_history_v2/DynamicStitch.pbtxt | 2 +- .../compat/ops_history_v2/EagerPyFunc.pbtxt | 2 +- .../compat/ops_history_v2/EditDistance.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Eig.pbtxt | 2 +- .../ops/compat/ops_history_v2/Einsum.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Elu.pbtxt | 2 +- .../ops/compat/ops_history_v2/EluGrad.pbtxt | 2 +- .../ops/compat/ops_history_v2/Empty.pbtxt | 2 +- .../ops_history_v2/EmptyTensorList.pbtxt | 2 +- .../ops_history_v2/EmptyTensorMap.pbtxt | 2 +- .../compat/ops_history_v2/EncodeBase64.pbtxt | 2 +- .../compat/ops_history_v2/EncodeJpeg.pbtxt | 2 +- .../EncodeJpegVariableQuality.pbtxt | 2 +- .../ops/compat/ops_history_v2/EncodePng.pbtxt | 2 +- .../compat/ops_history_v2/EncodeProto.pbtxt | 2 +- .../ops/compat/ops_history_v2/EncodeWav.pbtxt | 2 +- ...ueueTPUEmbeddingArbitraryTensorBatch.pbtxt | 2 +- .../EnqueueTPUEmbeddingBatch.pbtxt | 2 +- .../EnqueueTPUEmbeddingIntegerBatch.pbtxt | 2 +- ...EnqueueTPUEmbeddingRaggedTensorBatch.pbtxt | 2 +- .../EnqueueTPUEmbeddingSparseBatch.pbtxt | 2 +- ...EnqueueTPUEmbeddingSparseTensorBatch.pbtxt | 2 +- .../compat/ops_history_v2/EnsureShape.pbtxt | 2 +- .../ops/compat/ops_history_v2/Enter.pbtxt | 2 +- .../ops/compat/ops_history_v2/Equal.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Erf.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Erfc.pbtxt | 2 +- .../ops/compat/ops_history_v2/Erfinv.pbtxt | 2 +- .../compat/ops_history_v2/EuclideanNorm.pbtxt | 2 +- .../ExecuteTPUEmbeddingPartitioner.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Exit.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Exp.pbtxt | 2 +- .../compat/ops_history_v2/ExpandDims.pbtxt | 2 +- .../ExperimentalAssertNextDataset.pbtxt | 2 +- .../ExperimentalAutoShardDataset.pbtxt | 2 +- ...xperimentalBytesProducedStatsDataset.pbtxt | 2 +- .../ExperimentalCSVDataset.pbtxt | 2 +- .../ExperimentalChooseFastestDataset.pbtxt | 2 +- .../ExperimentalDatasetCardinality.pbtxt | 2 +- .../ExperimentalDatasetToTFRecord.pbtxt | 2 +- ...xperimentalDenseToSparseBatchDataset.pbtxt | 2 +- ...xperimentalDirectedInterleaveDataset.pbtxt | 2 +- .../ExperimentalGroupByReducerDataset.pbtxt | 2 +- .../ExperimentalGroupByWindowDataset.pbtxt | 2 +- .../ExperimentalIgnoreErrorsDataset.pbtxt | 2 +- .../ExperimentalIteratorGetDevice.pbtxt | 2 +- .../ExperimentalLMDBDataset.pbtxt | 2 +- .../ExperimentalLatencyStatsDataset.pbtxt | 2 +- .../ExperimentalMapAndBatchDataset.pbtxt | 2 +- .../ExperimentalMapDataset.pbtxt | 2 +- .../ExperimentalMatchingFilesDataset.pbtxt | 2 +- ...rimentalMaxIntraOpParallelismDataset.pbtxt | 2 +- .../ExperimentalNonSerializableDataset.pbtxt | 2 +- ...xperimentalParallelInterleaveDataset.pbtxt | 2 +- .../ExperimentalParseExampleDataset.pbtxt | 2 +- ...ExperimentalPrivateThreadPoolDataset.pbtxt | 2 +- .../ExperimentalRandomDataset.pbtxt | 2 +- .../ExperimentalRebatchDataset.pbtxt | 2 +- .../ExperimentalScanDataset.pbtxt | 2 +- ...xperimentalSetStatsAggregatorDataset.pbtxt | 2 +- .../ExperimentalSleepDataset.pbtxt | 2 +- .../ExperimentalSlidingWindowDataset.pbtxt | 2 +- .../ExperimentalSqlDataset.pbtxt | 2 +- .../ExperimentalStatsAggregatorHandle.pbtxt | 2 +- .../ExperimentalStatsAggregatorSummary.pbtxt | 2 +- .../ExperimentalTakeWhileDataset.pbtxt | 2 +- .../ExperimentalThreadPoolDataset.pbtxt | 2 +- .../ExperimentalThreadPoolHandle.pbtxt | 2 +- .../ExperimentalUnbatchDataset.pbtxt | 2 +- .../ExperimentalUniqueDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/Expint.pbtxt | 2 +- .../ops/compat/ops_history_v2/Expm1.pbtxt | 2 +- .../ops_history_v2/ExtractGlimpse.pbtxt | 2 +- .../ops_history_v2/ExtractGlimpseV2.pbtxt | 2 +- .../ops_history_v2/ExtractImagePatches.pbtxt | 2 +- .../ops_history_v2/ExtractJpegShape.pbtxt | 2 +- .../ops_history_v2/ExtractVolumePatches.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/FFT.pbtxt | 2 +- .../ops/compat/ops_history_v2/FFT2D.pbtxt | 2 +- .../ops/compat/ops_history_v2/FFT3D.pbtxt | 2 +- .../ops/compat/ops_history_v2/FFTND.pbtxt | 2 +- .../ops/compat/ops_history_v2/FIFOQueue.pbtxt | 2 +- .../compat/ops_history_v2/FIFOQueueV2.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Fact.pbtxt | 2 +- .../ops/compat/ops_history_v2/FakeParam.pbtxt | 2 +- .../FakeQuantWithMinMaxArgs.pbtxt | 2 +- .../FakeQuantWithMinMaxArgsGradient.pbtxt | 2 +- .../FakeQuantWithMinMaxVars.pbtxt | 2 +- .../FakeQuantWithMinMaxVarsGradient.pbtxt | 2 +- .../FakeQuantWithMinMaxVarsPerChannel.pbtxt | 2 +- ...uantWithMinMaxVarsPerChannelGradient.pbtxt | 2 +- .../ops/compat/ops_history_v2/FakeQueue.pbtxt | 2 +- .../FileSystemSetConfiguration.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Fill.pbtxt | 2 +- .../FilterByLastComponentDataset.pbtxt | 2 +- .../compat/ops_history_v2/FilterDataset.pbtxt | 2 +- .../ops_history_v2/FinalizeDataset.pbtxt | 2 +- .../ops_history_v2/FinalizeTPUEmbedding.pbtxt | 2 +- .../FinalizeTPUEmbeddingV2.pbtxt | 2 +- .../compat/ops_history_v2/Fingerprint.pbtxt | 2 +- .../FixedLengthRecordDataset.pbtxt | 2 +- .../FixedLengthRecordDatasetV2.pbtxt | 2 +- .../FixedLengthRecordReader.pbtxt | 2 +- .../FixedLengthRecordReaderV2.pbtxt | 2 +- .../FixedUnigramCandidateSampler.pbtxt | 2 +- .../ops_history_v2/FlatMapDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/Floor.pbtxt | 2 +- .../ops/compat/ops_history_v2/FloorDiv.pbtxt | 2 +- .../ops/compat/ops_history_v2/FloorMod.pbtxt | 2 +- .../ops_history_v2/FlushSummaryWriter.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/For.pbtxt | 2 +- .../ops_history_v2/FractionalAvgPool.pbtxt | 2 +- .../FractionalAvgPoolGrad.pbtxt | 2 +- .../ops_history_v2/FractionalMaxPool.pbtxt | 2 +- .../FractionalMaxPoolGrad.pbtxt | 2 +- .../compat/ops_history_v2/FresnelCos.pbtxt | 2 +- .../compat/ops_history_v2/FresnelSin.pbtxt | 2 +- .../ops_history_v2/FusedBatchNorm.pbtxt | 2 +- .../ops_history_v2/FusedBatchNormGrad.pbtxt | 2 +- .../ops_history_v2/FusedBatchNormGradV2.pbtxt | 2 +- .../ops_history_v2/FusedBatchNormGradV3.pbtxt | 2 +- .../ops_history_v2/FusedBatchNormV2.pbtxt | 2 +- .../ops_history_v2/FusedBatchNormV3.pbtxt | 2 +- .../ops_history_v2/FusedPadConv2D.pbtxt | 2 +- .../FusedResizeAndPadConv2D.pbtxt | 2 +- .../compat/ops_history_v2/GRUBlockCell.pbtxt | 2 +- .../ops_history_v2/GRUBlockCellGrad.pbtxt | 2 +- .../ops/compat/ops_history_v2/Gather.pbtxt | 2 +- .../ops/compat/ops_history_v2/GatherNd.pbtxt | 2 +- .../ops/compat/ops_history_v2/GatherV2.pbtxt | 2 +- .../GenerateBoundingBoxProposals.pbtxt | 2 +- .../GenerateVocabRemapping.pbtxt | 2 +- .../ops_history_v2/GeneratorDataset.pbtxt | 2 +- .../ops_history_v2/GetElementAtIndex.pbtxt | 2 +- ...etMinibatchSplitsWithPhysicalReplica.pbtxt | 2 +- ...tMinibatchesInCsrWithPhysicalReplica.pbtxt | 2 +- .../compat/ops_history_v2/GetOptions.pbtxt | 2 +- .../ops_history_v2/GetSessionHandle.pbtxt | 2 +- .../ops_history_v2/GetSessionHandleV2.pbtxt | 2 +- .../ops_history_v2/GetSessionTensor.pbtxt | 2 +- ...tStatsFromListOfSparseCoreCooTensors.pbtxt | 2 +- .../compat/ops_history_v2/GetTpuTaskId.pbtxt | 1 - .../compat/ops_history_v2/GlobalIterId.pbtxt | 2 +- .../ops_history_v2/GlobalShuffleDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/Greater.pbtxt | 2 +- .../compat/ops_history_v2/GreaterEqual.pbtxt | 2 +- .../GroupByReducerDataset.pbtxt | 2 +- .../ops_history_v2/GroupByWindowDataset.pbtxt | 2 +- .../ops_history_v2/GuaranteeConst.pbtxt | 2 +- .../ops/compat/ops_history_v2/HSVToRGB.pbtxt | 2 +- .../ops/compat/ops_history_v2/HashTable.pbtxt | 2 +- .../compat/ops_history_v2/HashTableV2.pbtxt | 2 +- .../ops_history_v2/HistogramFixedWidth.pbtxt | 2 +- .../ops_history_v2/HistogramSummary.pbtxt | 2 +- .../ops/compat/ops_history_v2/HostConst.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/IFFT.pbtxt | 2 +- .../ops/compat/ops_history_v2/IFFT2D.pbtxt | 2 +- .../ops/compat/ops_history_v2/IFFT3D.pbtxt | 2 +- .../ops/compat/ops_history_v2/IFFTND.pbtxt | 2 +- .../ops/compat/ops_history_v2/IRFFT.pbtxt | 2 +- .../ops/compat/ops_history_v2/IRFFT2D.pbtxt | 2 +- .../ops/compat/ops_history_v2/IRFFT3D.pbtxt | 2 +- .../ops/compat/ops_history_v2/IRFFTND.pbtxt | 2 +- .../ops/compat/ops_history_v2/Identity.pbtxt | 2 +- .../ops/compat/ops_history_v2/IdentityN.pbtxt | 2 +- .../ops_history_v2/IdentityReader.pbtxt | 2 +- .../ops_history_v2/IdentityReaderV2.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/If.pbtxt | 2 +- .../ops/compat/ops_history_v2/Igamma.pbtxt | 2 +- .../compat/ops_history_v2/IgammaGradA.pbtxt | 2 +- .../ops/compat/ops_history_v2/Igammac.pbtxt | 2 +- .../ops_history_v2/IgnoreErrorsDataset.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Imag.pbtxt | 2 +- .../ImageProjectiveTransformV2.pbtxt | 2 +- .../ImageProjectiveTransformV3.pbtxt | 2 +- .../compat/ops_history_v2/ImageSummary.pbtxt | 2 +- .../ops_history_v2/ImmutableConst.pbtxt | 2 +- .../compat/ops_history_v2/ImportEvent.pbtxt | 2 +- .../ops/compat/ops_history_v2/InTopK.pbtxt | 2 +- .../ops/compat/ops_history_v2/InTopKV2.pbtxt | 2 +- .../ops_history_v2/IndexFlatMapDataset.pbtxt | 2 +- .../compat/ops_history_v2/InfeedDequeue.pbtxt | 2 +- .../ops_history_v2/InfeedDequeueTuple.pbtxt | 2 +- .../compat/ops_history_v2/InfeedEnqueue.pbtxt | 2 +- .../InfeedEnqueuePrelinearizedBuffer.pbtxt | 2 +- .../ops_history_v2/InfeedEnqueueTuple.pbtxt | 2 +- .../ops_history_v2/InitializeTable.pbtxt | 2 +- .../InitializeTableFromDataset.pbtxt | 2 +- .../InitializeTableFromTextFile.pbtxt | 2 +- .../InitializeTableFromTextFileV2.pbtxt | 2 +- .../ops_history_v2/InitializeTableV2.pbtxt | 2 +- .../compat/ops_history_v2/InplaceAdd.pbtxt | 2 +- .../compat/ops_history_v2/InplaceSub.pbtxt | 2 +- .../compat/ops_history_v2/InplaceUpdate.pbtxt | 2 +- .../ops_history_v2/InterleaveDataset.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Inv.pbtxt | 2 +- .../ops/compat/ops_history_v2/InvGrad.pbtxt | 2 +- .../ops/compat/ops_history_v2/Invert.pbtxt | 2 +- .../ops_history_v2/InvertPermutation.pbtxt | 2 +- .../IsBoostedTreesEnsembleInitialized.pbtxt | 2 +- ...eesQuantileStreamResourceInitialized.pbtxt | 2 +- .../ops/compat/ops_history_v2/IsFinite.pbtxt | 2 +- .../ops/compat/ops_history_v2/IsInf.pbtxt | 2 +- .../ops/compat/ops_history_v2/IsNan.pbtxt | 2 +- .../IsTPUEmbeddingInitialized.pbtxt | 2 +- .../IsVariableInitialized.pbtxt | 2 +- .../ops_history_v2/IsotonicRegression.pbtxt | 2 +- .../ops/compat/ops_history_v2/Iterator.pbtxt | 2 +- .../IteratorFromStringHandle.pbtxt | 2 +- .../IteratorFromStringHandleV2.pbtxt | 2 +- .../ops_history_v2/IteratorGetDevice.pbtxt | 2 +- .../IteratorGetModelProto.pbtxt | 2 +- .../ops_history_v2/IteratorGetNext.pbtxt | 2 +- .../IteratorGetNextAsOptional.pbtxt | 2 +- .../ops_history_v2/IteratorGetNextSync.pbtxt | 2 +- .../IteratorToStringHandle.pbtxt | 2 +- .../compat/ops_history_v2/IteratorV2.pbtxt | 2 +- .../KMC2ChainInitialization.pbtxt | 2 +- .../KmeansPlusPlusInitialization.pbtxt | 2 +- .../ops_history_v2/KthOrderStatistic.pbtxt | 2 +- .../ops/compat/ops_history_v2/L2Loss.pbtxt | 2 +- .../compat/ops_history_v2/LMDBDataset.pbtxt | 2 +- .../compat/ops_history_v2/LMDBReader.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/LRN.pbtxt | 2 +- .../ops/compat/ops_history_v2/LRNGrad.pbtxt | 2 +- .../compat/ops_history_v2/LSTMBlockCell.pbtxt | 2 +- .../ops_history_v2/LSTMBlockCellGrad.pbtxt | 2 +- .../ops_history_v2/LatencyStatsDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/LeakyRelu.pbtxt | 2 +- .../compat/ops_history_v2/LeakyReluGrad.pbtxt | 2 +- .../LearnedUnigramCandidateSampler.pbtxt | 2 +- .../ops/compat/ops_history_v2/LeftShift.pbtxt | 2 +- .../LegacyParallelInterleaveDatasetV2.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Less.pbtxt | 2 +- .../ops/compat/ops_history_v2/LessEqual.pbtxt | 2 +- .../ops/compat/ops_history_v2/Lgamma.pbtxt | 2 +- .../ops/compat/ops_history_v2/LinSpace.pbtxt | 2 +- .../compat/ops_history_v2/ListDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/ListDiff.pbtxt | 2 +- .../ListSnapshotChunksDataset.pbtxt | 2 +- .../LoadAllTPUEmbeddingParameters.pbtxt | 2 +- .../ops_history_v2/LoadAndRemapMatrix.pbtxt | 2 +- .../compat/ops_history_v2/LoadDataset.pbtxt | 2 +- .../LoadTPUEmbeddingADAMParameters.pbtxt | 2 +- .../LoadTPUEmbeddingAdadeltaParameters.pbtxt | 2 +- ...PUEmbeddingAdagradMomentumParameters.pbtxt | 2 +- .../LoadTPUEmbeddingAdagradParameters.pbtxt | 2 +- ...PUEmbeddingCenteredRMSPropParameters.pbtxt | 2 +- .../LoadTPUEmbeddingFTRLParameters.pbtxt | 2 +- ...mbeddingFrequencyEstimatorParameters.pbtxt | 2 +- ...PUEmbeddingMDLAdagradLightParameters.pbtxt | 2 +- .../LoadTPUEmbeddingMomentumParameters.pbtxt | 2 +- ...PUEmbeddingProximalAdagradParameters.pbtxt | 2 +- ...adTPUEmbeddingProximalYogiParameters.pbtxt | 2 +- .../LoadTPUEmbeddingRMSPropParameters.pbtxt | 2 +- ...gStochasticGradientDescentParameters.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Log.pbtxt | 2 +- .../ops/compat/ops_history_v2/Log1p.pbtxt | 2 +- .../ops_history_v2/LogMatrixDeterminant.pbtxt | 2 +- .../compat/ops_history_v2/LogSoftmax.pbtxt | 2 +- .../LogUniformCandidateSampler.pbtxt | 2 +- .../compat/ops_history_v2/LogicalAnd.pbtxt | 2 +- .../compat/ops_history_v2/LogicalNot.pbtxt | 2 +- .../ops/compat/ops_history_v2/LogicalOr.pbtxt | 2 +- .../ops_history_v2/LookupTableExport.pbtxt | 2 +- .../ops_history_v2/LookupTableExportV2.pbtxt | 2 +- .../ops_history_v2/LookupTableFind.pbtxt | 2 +- .../ops_history_v2/LookupTableFindV2.pbtxt | 2 +- .../ops_history_v2/LookupTableImport.pbtxt | 2 +- .../ops_history_v2/LookupTableImportV2.pbtxt | 2 +- .../ops_history_v2/LookupTableInsert.pbtxt | 2 +- .../ops_history_v2/LookupTableInsertV2.pbtxt | 2 +- .../ops_history_v2/LookupTableRemoveV2.pbtxt | 2 +- .../ops_history_v2/LookupTableSize.pbtxt | 2 +- .../ops_history_v2/LookupTableSizeV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/LoopCond.pbtxt | 2 +- .../compat/ops_history_v2/LowerBound.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Lu.pbtxt | 2 +- .../compat/ops_history_v2/MakeIterator.pbtxt | 2 +- .../compat/ops_history_v2/MakeUnique.pbtxt | 2 +- .../ops_history_v2/MapAndBatchDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/MapClear.pbtxt | 2 +- .../compat/ops_history_v2/MapDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/MapDefun.pbtxt | 2 +- .../ops_history_v2/MapIncompleteSize.pbtxt | 2 +- .../ops/compat/ops_history_v2/MapPeek.pbtxt | 2 +- .../ops/compat/ops_history_v2/MapSize.pbtxt | 2 +- .../ops/compat/ops_history_v2/MapStage.pbtxt | 2 +- .../compat/ops_history_v2/MapUnstage.pbtxt | 2 +- .../ops_history_v2/MapUnstageNoKey.pbtxt | 2 +- .../ops/compat/ops_history_v2/MatMul.pbtxt | 2 +- .../compat/ops_history_v2/MatchingFiles.pbtxt | 2 +- .../ops_history_v2/MatchingFilesDataset.pbtxt | 2 +- .../ops_history_v2/MatrixBandPart.pbtxt | 2 +- .../ops_history_v2/MatrixDeterminant.pbtxt | 2 +- .../compat/ops_history_v2/MatrixDiag.pbtxt | 2 +- .../ops_history_v2/MatrixDiagPart.pbtxt | 2 +- .../ops_history_v2/MatrixDiagPartV2.pbtxt | 2 +- .../ops_history_v2/MatrixDiagPartV3.pbtxt | 2 +- .../compat/ops_history_v2/MatrixDiagV2.pbtxt | 2 +- .../compat/ops_history_v2/MatrixDiagV3.pbtxt | 2 +- .../ops_history_v2/MatrixExponential.pbtxt | 2 +- .../compat/ops_history_v2/MatrixInverse.pbtxt | 2 +- .../ops_history_v2/MatrixLogarithm.pbtxt | 2 +- .../compat/ops_history_v2/MatrixSetDiag.pbtxt | 2 +- .../ops_history_v2/MatrixSetDiagV2.pbtxt | 2 +- .../ops_history_v2/MatrixSetDiagV3.pbtxt | 2 +- .../compat/ops_history_v2/MatrixSolve.pbtxt | 2 +- .../compat/ops_history_v2/MatrixSolveLs.pbtxt | 2 +- .../ops_history_v2/MatrixSquareRoot.pbtxt | 2 +- .../MatrixTriangularSolve.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Max.pbtxt | 2 +- .../MaxIntraOpParallelismDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/MaxPool.pbtxt | 2 +- .../ops/compat/ops_history_v2/MaxPool3D.pbtxt | 2 +- .../compat/ops_history_v2/MaxPool3DGrad.pbtxt | 2 +- .../ops_history_v2/MaxPool3DGradGrad.pbtxt | 2 +- .../compat/ops_history_v2/MaxPoolGrad.pbtxt | 2 +- .../ops_history_v2/MaxPoolGradGrad.pbtxt | 2 +- .../ops_history_v2/MaxPoolGradGradV2.pbtxt | 2 +- .../MaxPoolGradGradWithArgmax.pbtxt | 2 +- .../compat/ops_history_v2/MaxPoolGradV2.pbtxt | 2 +- .../MaxPoolGradWithArgmax.pbtxt | 2 +- .../ops/compat/ops_history_v2/MaxPoolV2.pbtxt | 2 +- .../ops_history_v2/MaxPoolWithArgmax.pbtxt | 2 +- .../ops/compat/ops_history_v2/Maximum.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Mean.pbtxt | 2 +- .../ops/compat/ops_history_v2/Merge.pbtxt | 2 +- .../ops_history_v2/MergeDedupData.pbtxt | 2 +- .../compat/ops_history_v2/MergeSummary.pbtxt | 2 +- .../ops_history_v2/MergeV2Checkpoints.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Mfcc.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Min.pbtxt | 2 +- .../ops/compat/ops_history_v2/Minimum.pbtxt | 2 +- .../ops/compat/ops_history_v2/MirrorPad.pbtxt | 2 +- .../compat/ops_history_v2/MirrorPadGrad.pbtxt | 2 +- .../ops_history_v2/MlirPassthroughOp.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Mod.pbtxt | 2 +- .../compat/ops_history_v2/ModelDataset.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Mul.pbtxt | 2 +- .../ops/compat/ops_history_v2/MulNoNan.pbtxt | 2 +- .../ops_history_v2/MultiDeviceIterator.pbtxt | 2 +- .../MultiDeviceIteratorFromStringHandle.pbtxt | 2 +- .../MultiDeviceIteratorGetNextFromShard.pbtxt | 2 +- .../MultiDeviceIteratorInit.pbtxt | 2 +- .../MultiDeviceIteratorToStringHandle.pbtxt | 2 +- .../compat/ops_history_v2/Multinomial.pbtxt | 2 +- .../MutableDenseHashTable.pbtxt | 2 +- .../MutableDenseHashTableV2.pbtxt | 2 +- .../ops_history_v2/MutableHashTable.pbtxt | 2 +- .../MutableHashTableOfTensors.pbtxt | 2 +- .../MutableHashTableOfTensorsV2.pbtxt | 2 +- .../ops_history_v2/MutableHashTableV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/MutexLock.pbtxt | 2 +- .../ops/compat/ops_history_v2/MutexV2.pbtxt | 2 +- .../compat/ops_history_v2/NcclAllReduce.pbtxt | 2 +- .../compat/ops_history_v2/NcclBroadcast.pbtxt | 2 +- .../compat/ops_history_v2/NcclReduce.pbtxt | 2 +- .../ops/compat/ops_history_v2/Ndtri.pbtxt | 2 +- .../ops_history_v2/NearestNeighbors.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Neg.pbtxt | 2 +- .../ops/compat/ops_history_v2/NegTrain.pbtxt | 2 +- .../ops/compat/ops_history_v2/NextAfter.pbtxt | 2 +- .../compat/ops_history_v2/NextIteration.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/NoOp.pbtxt | 2 +- .../ops_history_v2/NonDeterministicInts.pbtxt | 2 +- .../ops_history_v2/NonMaxSuppression.pbtxt | 2 +- .../ops_history_v2/NonMaxSuppressionV2.pbtxt | 2 +- .../ops_history_v2/NonMaxSuppressionV3.pbtxt | 2 +- .../ops_history_v2/NonMaxSuppressionV4.pbtxt | 2 +- .../ops_history_v2/NonMaxSuppressionV5.pbtxt | 2 +- .../NonMaxSuppressionWithOverlaps.pbtxt | 2 +- .../NonSerializableDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/NotEqual.pbtxt | 2 +- .../compat/ops_history_v2/NthElement.pbtxt | 2 +- .../ops/compat/ops_history_v2/OneHot.pbtxt | 2 +- .../ops_history_v2/OneShotIterator.pbtxt | 2 +- .../ops/compat/ops_history_v2/OnesLike.pbtxt | 2 +- .../ops_history_v2/OptimizeDataset.pbtxt | 2 +- .../ops_history_v2/OptimizeDatasetV2.pbtxt | 2 +- .../ops_history_v2/OptionalFromValue.pbtxt | 2 +- .../ops_history_v2/OptionalGetValue.pbtxt | 2 +- .../ops_history_v2/OptionalHasValue.pbtxt | 2 +- .../compat/ops_history_v2/OptionalNone.pbtxt | 2 +- .../ops_history_v2/OptionsDataset.pbtxt | 2 +- .../ops_history_v2/OrderedMapClear.pbtxt | 2 +- .../OrderedMapIncompleteSize.pbtxt | 2 +- .../ops_history_v2/OrderedMapPeek.pbtxt | 2 +- .../ops_history_v2/OrderedMapSize.pbtxt | 2 +- .../ops_history_v2/OrderedMapStage.pbtxt | 2 +- .../ops_history_v2/OrderedMapUnstage.pbtxt | 2 +- .../OrderedMapUnstageNoKey.pbtxt | 2 +- .../ops_history_v2/OutfeedDequeue.pbtxt | 2 +- .../ops_history_v2/OutfeedDequeueTuple.pbtxt | 2 +- .../OutfeedDequeueTupleV2.pbtxt | 2 +- .../ops_history_v2/OutfeedDequeueV2.pbtxt | 2 +- .../ops_history_v2/OutfeedEnqueue.pbtxt | 2 +- .../ops_history_v2/OutfeedEnqueueTuple.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Pack.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Pad.pbtxt | 2 +- .../ops/compat/ops_history_v2/PadV2.pbtxt | 2 +- .../ops_history_v2/PaddedBatchDataset.pbtxt | 2 +- .../ops_history_v2/PaddedBatchDatasetV2.pbtxt | 2 +- .../ops_history_v2/PaddingFIFOQueue.pbtxt | 2 +- .../ops_history_v2/PaddingFIFOQueueV2.pbtxt | 2 +- .../ops_history_v2/ParallelBatchDataset.pbtxt | 2 +- .../ops_history_v2/ParallelConcat.pbtxt | 2 +- .../ParallelDynamicStitch.pbtxt | 2 +- .../ParallelFilterDataset.pbtxt | 2 +- .../ParallelInterleaveDataset.pbtxt | 2 +- .../ParallelInterleaveDatasetV2.pbtxt | 2 +- .../ParallelInterleaveDatasetV3.pbtxt | 2 +- .../ParallelInterleaveDatasetV4.pbtxt | 2 +- .../ops_history_v2/ParallelMapDataset.pbtxt | 2 +- .../ops_history_v2/ParallelMapDatasetV2.pbtxt | 2 +- .../ParameterizedTruncatedNormal.pbtxt | 2 +- .../compat/ops_history_v2/ParseExample.pbtxt | 2 +- .../ops_history_v2/ParseExampleDataset.pbtxt | 2 +- .../ParseExampleDatasetV2.pbtxt | 2 +- .../ops_history_v2/ParseExampleV2.pbtxt | 2 +- .../ops_history_v2/ParseSequenceExample.pbtxt | 2 +- .../ParseSequenceExampleV2.pbtxt | 2 +- .../ops_history_v2/ParseSingleExample.pbtxt | 2 +- .../ParseSingleSequenceExample.pbtxt | 2 +- .../compat/ops_history_v2/ParseTensor.pbtxt | 2 +- .../ops_history_v2/PartitionedCall.pbtxt | 2 +- .../compat/ops_history_v2/Placeholder.pbtxt | 2 +- .../compat/ops_history_v2/PlaceholderV2.pbtxt | 2 +- .../PlaceholderWithDefault.pbtxt | 2 +- .../ops/compat/ops_history_v2/Polygamma.pbtxt | 2 +- .../ops_history_v2/PopulationCount.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Pow.pbtxt | 2 +- .../ops_history_v2/PrefetchDataset.pbtxt | 2 +- .../compat/ops_history_v2/Prelinearize.pbtxt | 2 +- .../ops_history_v2/PrelinearizeTuple.pbtxt | 2 +- .../ops_history_v2/PreventGradient.pbtxt | 2 +- .../ops/compat/ops_history_v2/Print.pbtxt | 2 +- .../ops/compat/ops_history_v2/PrintV2.pbtxt | 2 +- .../compat/ops_history_v2/PriorityQueue.pbtxt | 2 +- .../ops_history_v2/PriorityQueueV2.pbtxt | 2 +- .../PrivateThreadPoolDataset.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Prod.pbtxt | 2 +- .../ops/compat/ops_history_v2/PyFunc.pbtxt | 2 +- .../ops_history_v2/PyFuncStateless.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Qr.pbtxt | 2 +- .../QuantizeAndDequantize.pbtxt | 2 +- .../QuantizeAndDequantizeV2.pbtxt | 2 +- .../QuantizeAndDequantizeV3.pbtxt | 2 +- .../QuantizeAndDequantizeV4.pbtxt | 2 +- .../QuantizeAndDequantizeV4Grad.pbtxt | 2 +- .../QuantizeDownAndShrinkRange.pbtxt | 2 +- .../compat/ops_history_v2/QuantizeV2.pbtxt | 2 +- .../compat/ops_history_v2/QuantizedAdd.pbtxt | 2 +- .../ops_history_v2/QuantizedAvgPool.pbtxt | 2 +- ...izedBatchNormWithGlobalNormalization.pbtxt | 2 +- .../ops_history_v2/QuantizedBiasAdd.pbtxt | 2 +- .../ops_history_v2/QuantizedConcat.pbtxt | 2 +- .../ops_history_v2/QuantizedConv2D.pbtxt | 2 +- .../QuantizedConv2DAndRelu.pbtxt | 2 +- .../QuantizedConv2DAndReluAndRequantize.pbtxt | 2 +- .../QuantizedConv2DAndRequantize.pbtxt | 2 +- .../QuantizedConv2DPerChannel.pbtxt | 2 +- .../QuantizedConv2DWithBias.pbtxt | 2 +- .../QuantizedConv2DWithBiasAndRelu.pbtxt | 2 +- ...edConv2DWithBiasAndReluAndRequantize.pbtxt | 2 +- ...QuantizedConv2DWithBiasAndRequantize.pbtxt | 2 +- ...ithBiasSignedSumAndReluAndRequantize.pbtxt | 2 +- .../QuantizedConv2DWithBiasSumAndRelu.pbtxt | 2 +- ...onv2DWithBiasSumAndReluAndRequantize.pbtxt | 2 +- .../QuantizedDepthwiseConv2D.pbtxt | 2 +- .../QuantizedDepthwiseConv2DWithBias.pbtxt | 2 +- ...ntizedDepthwiseConv2DWithBiasAndRelu.pbtxt | 2 +- ...seConv2DWithBiasAndReluAndRequantize.pbtxt | 2 +- .../QuantizedInstanceNorm.pbtxt | 2 +- .../ops_history_v2/QuantizedMatMul.pbtxt | 2 +- .../QuantizedMatMulWithBias.pbtxt | 2 +- ...QuantizedMatMulWithBiasAndDequantize.pbtxt | 2 +- .../QuantizedMatMulWithBiasAndRelu.pbtxt | 2 +- ...edMatMulWithBiasAndReluAndRequantize.pbtxt | 2 +- ...QuantizedMatMulWithBiasAndRequantize.pbtxt | 2 +- .../ops_history_v2/QuantizedMaxPool.pbtxt | 2 +- .../compat/ops_history_v2/QuantizedMul.pbtxt | 2 +- .../compat/ops_history_v2/QuantizedRelu.pbtxt | 2 +- .../ops_history_v2/QuantizedRelu6.pbtxt | 2 +- .../ops_history_v2/QuantizedReluX.pbtxt | 2 +- .../ops_history_v2/QuantizedReshape.pbtxt | 2 +- .../QuantizedResizeBilinear.pbtxt | 2 +- .../compat/ops_history_v2/QueueClose.pbtxt | 2 +- .../compat/ops_history_v2/QueueCloseV2.pbtxt | 2 +- .../compat/ops_history_v2/QueueDequeue.pbtxt | 2 +- .../ops_history_v2/QueueDequeueMany.pbtxt | 2 +- .../ops_history_v2/QueueDequeueManyV2.pbtxt | 2 +- .../ops_history_v2/QueueDequeueUpTo.pbtxt | 2 +- .../ops_history_v2/QueueDequeueUpToV2.pbtxt | 2 +- .../ops_history_v2/QueueDequeueV2.pbtxt | 2 +- .../compat/ops_history_v2/QueueEnqueue.pbtxt | 2 +- .../ops_history_v2/QueueEnqueueMany.pbtxt | 2 +- .../ops_history_v2/QueueEnqueueManyV2.pbtxt | 2 +- .../ops_history_v2/QueueEnqueueV2.pbtxt | 2 +- .../compat/ops_history_v2/QueueIsClosed.pbtxt | 2 +- .../ops_history_v2/QueueIsClosedV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/QueueSize.pbtxt | 2 +- .../compat/ops_history_v2/QueueSizeV2.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/RFFT.pbtxt | 2 +- .../ops/compat/ops_history_v2/RFFT2D.pbtxt | 2 +- .../ops/compat/ops_history_v2/RFFT3D.pbtxt | 2 +- .../ops/compat/ops_history_v2/RFFTND.pbtxt | 2 +- .../ops/compat/ops_history_v2/RGBToHSV.pbtxt | 2 +- .../ops_history_v2/RaggedBincount.pbtxt | 2 +- .../RaggedCountSparseOutput.pbtxt | 2 +- .../compat/ops_history_v2/RaggedCross.pbtxt | 2 +- .../ops_history_v2/RaggedFillEmptyRows.pbtxt | 2 +- .../RaggedFillEmptyRowsGrad.pbtxt | 2 +- .../compat/ops_history_v2/RaggedGather.pbtxt | 2 +- .../compat/ops_history_v2/RaggedRange.pbtxt | 2 +- .../RaggedTensorFromVariant.pbtxt | 2 +- .../ops_history_v2/RaggedTensorToSparse.pbtxt | 2 +- .../ops_history_v2/RaggedTensorToTensor.pbtxt | 2 +- .../RaggedTensorToVariant.pbtxt | 2 +- .../RaggedTensorToVariantGradient.pbtxt | 2 +- .../compat/ops_history_v2/RandomCrop.pbtxt | 2 +- .../compat/ops_history_v2/RandomDataset.pbtxt | 2 +- .../ops_history_v2/RandomDatasetV2.pbtxt | 2 +- .../compat/ops_history_v2/RandomGamma.pbtxt | 2 +- .../ops_history_v2/RandomGammaGrad.pbtxt | 2 +- .../ops_history_v2/RandomIndexShuffle.pbtxt | 2 +- .../compat/ops_history_v2/RandomPoisson.pbtxt | 2 +- .../ops_history_v2/RandomPoissonV2.pbtxt | 2 +- .../compat/ops_history_v2/RandomShuffle.pbtxt | 2 +- .../ops_history_v2/RandomShuffleQueue.pbtxt | 2 +- .../ops_history_v2/RandomShuffleQueueV2.pbtxt | 2 +- .../ops_history_v2/RandomStandardNormal.pbtxt | 2 +- .../compat/ops_history_v2/RandomUniform.pbtxt | 2 +- .../ops_history_v2/RandomUniformInt.pbtxt | 2 +- .../ops/compat/ops_history_v2/Range.pbtxt | 2 +- .../compat/ops_history_v2/RangeDataset.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Rank.pbtxt | 2 +- .../ops/compat/ops_history_v2/ReadFile.pbtxt | 2 +- .../ops_history_v2/ReadVariableOp.pbtxt | 2 +- .../ReadVariableXlaSplitND.pbtxt | 2 +- .../ReaderNumRecordsProduced.pbtxt | 2 +- .../ReaderNumRecordsProducedV2.pbtxt | 2 +- .../ReaderNumWorkUnitsCompleted.pbtxt | 2 +- .../ReaderNumWorkUnitsCompletedV2.pbtxt | 2 +- .../compat/ops_history_v2/ReaderRead.pbtxt | 2 +- .../ops_history_v2/ReaderReadUpTo.pbtxt | 2 +- .../ops_history_v2/ReaderReadUpToV2.pbtxt | 2 +- .../compat/ops_history_v2/ReaderReadV2.pbtxt | 2 +- .../compat/ops_history_v2/ReaderReset.pbtxt | 2 +- .../compat/ops_history_v2/ReaderResetV2.pbtxt | 2 +- .../ops_history_v2/ReaderRestoreState.pbtxt | 2 +- .../ops_history_v2/ReaderRestoreStateV2.pbtxt | 2 +- .../ops_history_v2/ReaderSerializeState.pbtxt | 2 +- .../ReaderSerializeStateV2.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Real.pbtxt | 2 +- .../ops/compat/ops_history_v2/RealDiv.pbtxt | 2 +- .../ops_history_v2/RebatchDataset.pbtxt | 2 +- .../ops_history_v2/RebatchDatasetV2.pbtxt | 2 +- .../compat/ops_history_v2/Reciprocal.pbtxt | 2 +- .../ops_history_v2/ReciprocalGrad.pbtxt | 2 +- .../compat/ops_history_v2/RecordInput.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Recv.pbtxt | 2 +- .../RecvTPUEmbeddingActivations.pbtxt | 2 +- .../compat/ops_history_v2/ReduceDataset.pbtxt | 2 +- .../compat/ops_history_v2/ReduceJoin.pbtxt | 2 +- .../ops/compat/ops_history_v2/RefEnter.pbtxt | 2 +- .../ops/compat/ops_history_v2/RefExit.pbtxt | 2 +- .../compat/ops_history_v2/RefIdentity.pbtxt | 2 +- .../ops/compat/ops_history_v2/RefMerge.pbtxt | 2 +- .../ops_history_v2/RefNextIteration.pbtxt | 2 +- .../ops/compat/ops_history_v2/RefSelect.pbtxt | 2 +- .../ops/compat/ops_history_v2/RefSwitch.pbtxt | 2 +- .../ops_history_v2/RegexFullMatch.pbtxt | 2 +- .../compat/ops_history_v2/RegexReplace.pbtxt | 2 +- .../ops_history_v2/RegisterDataset.pbtxt | 2 +- .../ops_history_v2/RegisterDatasetV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/Relayout.pbtxt | 2 +- .../compat/ops_history_v2/RelayoutLike.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Relu.pbtxt | 2 +- .../ops/compat/ops_history_v2/Relu6.pbtxt | 2 +- .../ops/compat/ops_history_v2/Relu6Grad.pbtxt | 2 +- .../ops/compat/ops_history_v2/ReluGrad.pbtxt | 2 +- .../compat/ops_history_v2/RemoteCall.pbtxt | 2 +- .../compat/ops_history_v2/RepeatDataset.pbtxt | 2 +- .../ops_history_v2/RequantizationRange.pbtxt | 2 +- .../RequantizationRangePerChannel.pbtxt | 2 +- .../compat/ops_history_v2/Requantize.pbtxt | 2 +- .../ops_history_v2/RequantizePerChannel.pbtxt | 2 +- .../ops/compat/ops_history_v2/Reshape.pbtxt | 2 +- .../compat/ops_history_v2/ResizeArea.pbtxt | 2 +- .../compat/ops_history_v2/ResizeBicubic.pbtxt | 2 +- .../ops_history_v2/ResizeBicubicGrad.pbtxt | 2 +- .../ops_history_v2/ResizeBilinear.pbtxt | 2 +- .../ops_history_v2/ResizeBilinearGrad.pbtxt | 2 +- .../ResizeNearestNeighbor.pbtxt | 2 +- .../ResizeNearestNeighborGrad.pbtxt | 2 +- .../ResourceAccumulatorApplyGradient.pbtxt | 2 +- .../ResourceAccumulatorNumAccumulated.pbtxt | 2 +- .../ResourceAccumulatorSetGlobalStep.pbtxt | 2 +- .../ResourceAccumulatorTakeGradient.pbtxt | 2 +- .../ops_history_v2/ResourceApplyAdaMax.pbtxt | 2 +- .../ResourceApplyAdadelta.pbtxt | 2 +- .../ops_history_v2/ResourceApplyAdagrad.pbtxt | 2 +- .../ResourceApplyAdagradDA.pbtxt | 2 +- .../ResourceApplyAdagradV2.pbtxt | 2 +- .../ops_history_v2/ResourceApplyAdam.pbtxt | 2 +- .../ResourceApplyAdamWithAmsgrad.pbtxt | 2 +- .../ops_history_v2/ResourceApplyAddSign.pbtxt | 2 +- .../ResourceApplyCenteredRMSProp.pbtxt | 2 +- .../ops_history_v2/ResourceApplyFtrl.pbtxt | 2 +- .../ops_history_v2/ResourceApplyFtrlV2.pbtxt | 2 +- .../ResourceApplyGradientDescent.pbtxt | 2 +- .../ResourceApplyKerasMomentum.pbtxt | 2 +- .../ResourceApplyMomentum.pbtxt | 2 +- .../ResourceApplyPowerSign.pbtxt | 2 +- .../ResourceApplyProximalAdagrad.pbtxt | 2 +- ...ResourceApplyProximalGradientDescent.pbtxt | 2 +- .../ops_history_v2/ResourceApplyRMSProp.pbtxt | 2 +- .../ResourceConditionalAccumulator.pbtxt | 2 +- .../ops_history_v2/ResourceCountUpTo.pbtxt | 2 +- .../ops_history_v2/ResourceGather.pbtxt | 2 +- .../ops_history_v2/ResourceGatherNd.pbtxt | 2 +- .../ops_history_v2/ResourceScatterAdd.pbtxt | 2 +- .../ops_history_v2/ResourceScatterDiv.pbtxt | 2 +- .../ops_history_v2/ResourceScatterMax.pbtxt | 2 +- .../ops_history_v2/ResourceScatterMin.pbtxt | 2 +- .../ops_history_v2/ResourceScatterMul.pbtxt | 2 +- .../ops_history_v2/ResourceScatterNdAdd.pbtxt | 2 +- .../ops_history_v2/ResourceScatterNdMax.pbtxt | 2 +- .../ops_history_v2/ResourceScatterNdMin.pbtxt | 2 +- .../ops_history_v2/ResourceScatterNdSub.pbtxt | 2 +- .../ResourceScatterNdUpdate.pbtxt | 2 +- .../ops_history_v2/ResourceScatterSub.pbtxt | 2 +- .../ResourceScatterUpdate.pbtxt | 2 +- .../ResourceSparseApplyAdadelta.pbtxt | 2 +- .../ResourceSparseApplyAdagrad.pbtxt | 2 +- .../ResourceSparseApplyAdagradDA.pbtxt | 2 +- .../ResourceSparseApplyAdagradV2.pbtxt | 2 +- .../ResourceSparseApplyCenteredRMSProp.pbtxt | 2 +- .../ResourceSparseApplyFtrl.pbtxt | 2 +- .../ResourceSparseApplyFtrlV2.pbtxt | 2 +- .../ResourceSparseApplyKerasMomentum.pbtxt | 2 +- .../ResourceSparseApplyMomentum.pbtxt | 2 +- .../ResourceSparseApplyProximalAdagrad.pbtxt | 2 +- ...ceSparseApplyProximalGradientDescent.pbtxt | 2 +- .../ResourceSparseApplyRMSProp.pbtxt | 2 +- .../ResourceStridedSliceAssign.pbtxt | 2 +- .../ops/compat/ops_history_v2/Restore.pbtxt | 2 +- .../compat/ops_history_v2/RestoreSlice.pbtxt | 2 +- .../ops/compat/ops_history_v2/RestoreV2.pbtxt | 2 +- .../RetrieveAllTPUEmbeddingParameters.pbtxt | 2 +- .../RetrieveTPUEmbeddingADAMParameters.pbtxt | 2 +- ...trieveTPUEmbeddingAdadeltaParameters.pbtxt | 2 +- ...PUEmbeddingAdagradMomentumParameters.pbtxt | 2 +- ...etrieveTPUEmbeddingAdagradParameters.pbtxt | 2 +- ...PUEmbeddingCenteredRMSPropParameters.pbtxt | 2 +- .../RetrieveTPUEmbeddingFTRLParameters.pbtxt | 2 +- ...mbeddingFrequencyEstimatorParameters.pbtxt | 2 +- ...PUEmbeddingMDLAdagradLightParameters.pbtxt | 2 +- ...trieveTPUEmbeddingMomentumParameters.pbtxt | 2 +- ...PUEmbeddingProximalAdagradParameters.pbtxt | 2 +- ...veTPUEmbeddingProximalYogiParameters.pbtxt | 2 +- ...etrieveTPUEmbeddingRMSPropParameters.pbtxt | 2 +- ...gStochasticGradientDescentParameters.pbtxt | 2 +- .../ops/compat/ops_history_v2/Reverse.pbtxt | 2 +- .../ops_history_v2/ReverseSequence.pbtxt | 2 +- .../ops/compat/ops_history_v2/ReverseV2.pbtxt | 2 +- .../ops_history_v2/RewriteDataset.pbtxt | 2 +- .../compat/ops_history_v2/RightShift.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Rint.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscAbs.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscAdd.pbtxt | 2 +- .../ops_history_v2/RiscBinaryArithmetic.pbtxt | 2 +- .../ops_history_v2/RiscBinaryComparison.pbtxt | 2 +- .../compat/ops_history_v2/RiscBitcast.pbtxt | 2 +- .../compat/ops_history_v2/RiscBroadcast.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscCast.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscCeil.pbtxt | 2 +- .../compat/ops_history_v2/RiscCholesky.pbtxt | 2 +- .../compat/ops_history_v2/RiscConcat.pbtxt | 2 +- .../compat/ops_history_v2/RiscCondition.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscConv.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscCos.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscDiv.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscDot.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscExp.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscFft.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscFloor.pbtxt | 2 +- .../compat/ops_history_v2/RiscGather.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscImag.pbtxt | 2 +- .../compat/ops_history_v2/RiscIsFinite.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscLog.pbtxt | 2 +- .../ops_history_v2/RiscLogicalAnd.pbtxt | 2 +- .../ops_history_v2/RiscLogicalNot.pbtxt | 2 +- .../compat/ops_history_v2/RiscLogicalOr.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscMax.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscMin.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscMul.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscNeg.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscPad.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscPool.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscPow.pbtxt | 2 +- .../ops_history_v2/RiscRandomUniform.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscReal.pbtxt | 2 +- .../compat/ops_history_v2/RiscReduce.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscRem.pbtxt | 2 +- .../compat/ops_history_v2/RiscReshape.pbtxt | 2 +- .../compat/ops_history_v2/RiscReverse.pbtxt | 2 +- .../compat/ops_history_v2/RiscScatter.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscShape.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscSign.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscSlice.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscSort.pbtxt | 2 +- .../compat/ops_history_v2/RiscSqueeze.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscSub.pbtxt | 2 +- .../compat/ops_history_v2/RiscTranspose.pbtxt | 2 +- .../ops_history_v2/RiscTriangularSolve.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscUnary.pbtxt | 2 +- .../ops/compat/ops_history_v2/RiscWhile.pbtxt | 2 +- .../ops_history_v2/RngReadAndSkip.pbtxt | 2 +- .../ops/compat/ops_history_v2/RngSkip.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Roll.pbtxt | 2 +- .../ops/compat/ops_history_v2/Round.pbtxt | 2 +- .../ops/compat/ops_history_v2/Rsqrt.pbtxt | 2 +- .../ops/compat/ops_history_v2/RsqrtGrad.pbtxt | 2 +- .../SampleDistortedBoundingBox.pbtxt | 2 +- .../SampleDistortedBoundingBoxV2.pbtxt | 2 +- .../ops_history_v2/SamplingDataset.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Save.pbtxt | 2 +- .../compat/ops_history_v2/SaveDataset.pbtxt | 2 +- .../compat/ops_history_v2/SaveDatasetV2.pbtxt | 2 +- .../compat/ops_history_v2/SaveSlices.pbtxt | 2 +- .../ops/compat/ops_history_v2/SaveV2.pbtxt | 2 +- .../compat/ops_history_v2/ScalarSummary.pbtxt | 2 +- .../ops_history_v2/ScaleAndTranslate.pbtxt | 2 +- .../ScaleAndTranslateGrad.pbtxt | 2 +- .../compat/ops_history_v2/ScanDataset.pbtxt | 2 +- .../compat/ops_history_v2/ScatterAdd.pbtxt | 2 +- .../compat/ops_history_v2/ScatterDiv.pbtxt | 2 +- .../compat/ops_history_v2/ScatterMax.pbtxt | 2 +- .../compat/ops_history_v2/ScatterMin.pbtxt | 2 +- .../compat/ops_history_v2/ScatterMul.pbtxt | 2 +- .../ops/compat/ops_history_v2/ScatterNd.pbtxt | 2 +- .../compat/ops_history_v2/ScatterNdAdd.pbtxt | 2 +- .../compat/ops_history_v2/ScatterNdMax.pbtxt | 2 +- .../compat/ops_history_v2/ScatterNdMin.pbtxt | 2 +- .../ScatterNdNonAliasingAdd.pbtxt | 2 +- .../compat/ops_history_v2/ScatterNdSub.pbtxt | 2 +- .../ops_history_v2/ScatterNdUpdate.pbtxt | 2 +- .../compat/ops_history_v2/ScatterSub.pbtxt | 2 +- .../compat/ops_history_v2/ScatterUpdate.pbtxt | 2 +- .../compat/ops_history_v2/SdcaFprint.pbtxt | 2 +- .../compat/ops_history_v2/SdcaOptimizer.pbtxt | 2 +- .../ops_history_v2/SdcaOptimizerV2.pbtxt | 2 +- .../compat/ops_history_v2/SdcaShrinkL1.pbtxt | 2 +- .../compat/ops_history_v2/SegmentMax.pbtxt | 2 +- .../compat/ops_history_v2/SegmentMaxV2.pbtxt | 2 +- .../compat/ops_history_v2/SegmentMean.pbtxt | 2 +- .../compat/ops_history_v2/SegmentMin.pbtxt | 2 +- .../compat/ops_history_v2/SegmentMinV2.pbtxt | 2 +- .../compat/ops_history_v2/SegmentProd.pbtxt | 2 +- .../compat/ops_history_v2/SegmentProdV2.pbtxt | 2 +- .../compat/ops_history_v2/SegmentSum.pbtxt | 2 +- .../compat/ops_history_v2/SegmentSumV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/Select.pbtxt | 2 +- .../ops/compat/ops_history_v2/SelectV2.pbtxt | 2 +- .../ops_history_v2/SelfAdjointEig.pbtxt | 2 +- .../ops_history_v2/SelfAdjointEigV2.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Selu.pbtxt | 2 +- .../ops/compat/ops_history_v2/SeluGrad.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Send.pbtxt | 2 +- .../SendTPUEmbeddingGradients.pbtxt | 2 +- .../ops_history_v2/SerializeIterator.pbtxt | 2 +- .../ops_history_v2/SerializeManySparse.pbtxt | 2 +- .../ops_history_v2/SerializeSparse.pbtxt | 2 +- .../ops_history_v2/SerializeTensor.pbtxt | 2 +- .../ops/compat/ops_history_v2/SetSize.pbtxt | 2 +- .../SetStatsAggregatorDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/Shape.pbtxt | 2 +- .../ops/compat/ops_history_v2/ShapeN.pbtxt | 2 +- .../compat/ops_history_v2/ShardDataset.pbtxt | 2 +- .../ops_history_v2/ShardedFilename.pbtxt | 2 +- .../ops_history_v2/ShardedFilespec.pbtxt | 2 +- .../ShuffleAndRepeatDataset.pbtxt | 2 +- .../ShuffleAndRepeatDatasetV2.pbtxt | 2 +- .../ops_history_v2/ShuffleDataset.pbtxt | 2 +- .../ops_history_v2/ShuffleDatasetV2.pbtxt | 2 +- .../ops_history_v2/ShuffleDatasetV3.pbtxt | 2 +- .../ShutdownDistributedTPU.pbtxt | 2 +- .../ops_history_v2/ShutdownTPUSystem.pbtxt | 2 +- .../ops/compat/ops_history_v2/Sigmoid.pbtxt | 2 +- .../compat/ops_history_v2/SigmoidGrad.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Sign.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Sin.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Sinh.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Size.pbtxt | 2 +- .../compat/ops_history_v2/SkipDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/Skipgram.pbtxt | 2 +- .../compat/ops_history_v2/SleepDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/Slice.pbtxt | 2 +- .../ops_history_v2/SlidingWindowDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/Snapshot.pbtxt | 2 +- .../ops_history_v2/SnapshotChunkDataset.pbtxt | 2 +- .../ops_history_v2/SnapshotDataset.pbtxt | 2 +- .../SnapshotDatasetReader.pbtxt | 2 +- .../ops_history_v2/SnapshotDatasetV2.pbtxt | 2 +- .../SnapshotNestedDatasetReader.pbtxt | 2 +- .../compat/ops_history_v2/SobolSample.pbtxt | 2 +- .../ops/compat/ops_history_v2/Softmax.pbtxt | 2 +- .../SoftmaxCrossEntropyWithLogits.pbtxt | 2 +- .../ops/compat/ops_history_v2/Softplus.pbtxt | 2 +- .../compat/ops_history_v2/SoftplusGrad.pbtxt | 2 +- .../ops/compat/ops_history_v2/Softsign.pbtxt | 2 +- .../compat/ops_history_v2/SoftsignGrad.pbtxt | 2 +- .../SortListOfSparseCoreCooTensors.pbtxt | 2 +- .../compat/ops_history_v2/SpaceToBatch.pbtxt | 2 +- .../ops_history_v2/SpaceToBatchND.pbtxt | 2 +- .../compat/ops_history_v2/SpaceToDepth.pbtxt | 2 +- .../SparseAccumulatorApplyGradient.pbtxt | 2 +- .../SparseAccumulatorTakeGradient.pbtxt | 2 +- .../ops/compat/ops_history_v2/SparseAdd.pbtxt | 2 +- .../compat/ops_history_v2/SparseAddGrad.pbtxt | 2 +- .../ops_history_v2/SparseApplyAdadelta.pbtxt | 2 +- .../ops_history_v2/SparseApplyAdagrad.pbtxt | 2 +- .../ops_history_v2/SparseApplyAdagradDA.pbtxt | 2 +- .../ops_history_v2/SparseApplyAdagradV2.pbtxt | 2 +- .../SparseApplyCenteredRMSProp.pbtxt | 2 +- .../ops_history_v2/SparseApplyFtrl.pbtxt | 2 +- .../ops_history_v2/SparseApplyFtrlV2.pbtxt | 2 +- .../ops_history_v2/SparseApplyMomentum.pbtxt | 2 +- .../SparseApplyProximalAdagrad.pbtxt | 2 +- .../SparseApplyProximalGradientDescent.pbtxt | 2 +- .../ops_history_v2/SparseApplyRMSProp.pbtxt | 2 +- .../ops_history_v2/SparseBincount.pbtxt | 2 +- .../compat/ops_history_v2/SparseConcat.pbtxt | 2 +- .../SparseConditionalAccumulator.pbtxt | 2 +- .../SparseCountSparseOutput.pbtxt | 2 +- .../compat/ops_history_v2/SparseCross.pbtxt | 2 +- .../ops_history_v2/SparseCrossHashed.pbtxt | 2 +- .../compat/ops_history_v2/SparseCrossV2.pbtxt | 2 +- .../ops_history_v2/SparseDenseCwiseAdd.pbtxt | 2 +- .../ops_history_v2/SparseDenseCwiseDiv.pbtxt | 2 +- .../ops_history_v2/SparseDenseCwiseMul.pbtxt | 2 +- .../ops_history_v2/SparseFillEmptyRows.pbtxt | 2 +- .../SparseFillEmptyRowsGrad.pbtxt | 2 +- .../compat/ops_history_v2/SparseMatMul.pbtxt | 2 +- .../ops_history_v2/SparseMatrixAdd.pbtxt | 2 +- .../ops_history_v2/SparseMatrixMatMul.pbtxt | 2 +- .../ops_history_v2/SparseMatrixMul.pbtxt | 2 +- .../ops_history_v2/SparseMatrixNNZ.pbtxt | 2 +- .../SparseMatrixOrderingAMD.pbtxt | 2 +- .../ops_history_v2/SparseMatrixSoftmax.pbtxt | 2 +- .../SparseMatrixSoftmaxGrad.pbtxt | 2 +- .../SparseMatrixSparseCholesky.pbtxt | 2 +- .../SparseMatrixSparseMatMul.pbtxt | 2 +- .../SparseMatrixTranspose.pbtxt | 2 +- .../ops_history_v2/SparseMatrixZeros.pbtxt | 2 +- .../ops_history_v2/SparseReduceMax.pbtxt | 2 +- .../SparseReduceMaxSparse.pbtxt | 2 +- .../ops_history_v2/SparseReduceSum.pbtxt | 2 +- .../SparseReduceSumSparse.pbtxt | 2 +- .../compat/ops_history_v2/SparseReorder.pbtxt | 2 +- .../compat/ops_history_v2/SparseReshape.pbtxt | 2 +- .../ops_history_v2/SparseSegmentMean.pbtxt | 2 +- .../SparseSegmentMeanGrad.pbtxt | 2 +- .../SparseSegmentMeanGradV2.pbtxt | 2 +- .../SparseSegmentMeanWithNumSegments.pbtxt | 2 +- .../ops_history_v2/SparseSegmentSqrtN.pbtxt | 2 +- .../SparseSegmentSqrtNGrad.pbtxt | 2 +- .../SparseSegmentSqrtNGradV2.pbtxt | 2 +- .../SparseSegmentSqrtNWithNumSegments.pbtxt | 2 +- .../ops_history_v2/SparseSegmentSum.pbtxt | 2 +- .../ops_history_v2/SparseSegmentSumGrad.pbtxt | 2 +- .../SparseSegmentSumGradV2.pbtxt | 2 +- .../SparseSegmentSumWithNumSegments.pbtxt | 2 +- .../compat/ops_history_v2/SparseSlice.pbtxt | 2 +- .../ops_history_v2/SparseSliceGrad.pbtxt | 2 +- .../compat/ops_history_v2/SparseSoftmax.pbtxt | 2 +- .../SparseSoftmaxCrossEntropyWithLogits.pbtxt | 2 +- .../ops_history_v2/SparseSparseMaximum.pbtxt | 2 +- .../ops_history_v2/SparseSparseMinimum.pbtxt | 2 +- .../compat/ops_history_v2/SparseSplit.pbtxt | 2 +- .../ops_history_v2/SparseTensorDenseAdd.pbtxt | 2 +- .../SparseTensorDenseMatMul.pbtxt | 2 +- .../SparseTensorSliceDataset.pbtxt | 2 +- .../SparseTensorToCSRSparseMatrix.pbtxt | 2 +- .../compat/ops_history_v2/SparseToDense.pbtxt | 2 +- .../SparseToSparseSetOperation.pbtxt | 2 +- .../ops/compat/ops_history_v2/Spence.pbtxt | 2 +- .../ops/compat/ops_history_v2/Split.pbtxt | 2 +- .../ops_history_v2/SplitDedupData.pbtxt | 2 +- .../ops/compat/ops_history_v2/SplitV.pbtxt | 2 +- .../compat/ops_history_v2/SqlDataset.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Sqrt.pbtxt | 2 +- .../ops/compat/ops_history_v2/SqrtGrad.pbtxt | 2 +- .../ops/compat/ops_history_v2/Square.pbtxt | 2 +- .../ops_history_v2/SquaredDifference.pbtxt | 2 +- .../ops/compat/ops_history_v2/Squeeze.pbtxt | 2 +- .../ops/compat/ops_history_v2/Stack.pbtxt | 2 +- .../compat/ops_history_v2/StackClose.pbtxt | 2 +- .../compat/ops_history_v2/StackCloseV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/StackPop.pbtxt | 2 +- .../compat/ops_history_v2/StackPopV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/StackPush.pbtxt | 2 +- .../compat/ops_history_v2/StackPushV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/StackV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/Stage.pbtxt | 2 +- .../compat/ops_history_v2/StageClear.pbtxt | 2 +- .../ops/compat/ops_history_v2/StagePeek.pbtxt | 2 +- .../ops/compat/ops_history_v2/StageSize.pbtxt | 2 +- .../StatefulPartitionedCall.pbtxt | 2 +- .../StatefulRandomBinomial.pbtxt | 2 +- .../StatefulStandardNormal.pbtxt | 2 +- .../StatefulStandardNormalV2.pbtxt | 2 +- .../StatefulTruncatedNormal.pbtxt | 2 +- .../ops_history_v2/StatefulUniform.pbtxt | 2 +- .../StatefulUniformFullInt.pbtxt | 2 +- .../ops_history_v2/StatefulUniformInt.pbtxt | 2 +- .../compat/ops_history_v2/StatelessCase.pbtxt | 2 +- .../compat/ops_history_v2/StatelessIf.pbtxt | 2 +- .../ops_history_v2/StatelessMultinomial.pbtxt | 2 +- ...tatelessParameterizedTruncatedNormal.pbtxt | 2 +- .../StatelessRandomBinomial.pbtxt | 2 +- .../StatelessRandomGammaV2.pbtxt | 2 +- .../StatelessRandomGammaV3.pbtxt | 2 +- .../StatelessRandomGetAlg.pbtxt | 2 +- .../StatelessRandomGetKeyCounter.pbtxt | 2 +- .../StatelessRandomGetKeyCounterAlg.pbtxt | 2 +- .../StatelessRandomNormal.pbtxt | 2 +- .../StatelessRandomNormalV2.pbtxt | 2 +- .../StatelessRandomPoisson.pbtxt | 2 +- .../StatelessRandomUniform.pbtxt | 2 +- .../StatelessRandomUniformFullInt.pbtxt | 2 +- .../StatelessRandomUniformFullIntV2.pbtxt | 2 +- .../StatelessRandomUniformInt.pbtxt | 2 +- .../StatelessRandomUniformIntV2.pbtxt | 2 +- .../StatelessRandomUniformV2.pbtxt | 2 +- .../StatelessSampleDistortedBoundingBox.pbtxt | 2 +- .../ops_history_v2/StatelessShuffle.pbtxt | 2 +- .../StatelessTruncatedNormal.pbtxt | 2 +- .../StatelessTruncatedNormalV2.pbtxt | 2 +- .../ops_history_v2/StatelessWhile.pbtxt | 2 +- .../ops_history_v2/StaticRegexFullMatch.pbtxt | 2 +- .../ops_history_v2/StaticRegexReplace.pbtxt | 2 +- .../StatsAggregatorHandle.pbtxt | 2 +- .../StatsAggregatorHandleV2.pbtxt | 2 +- .../StatsAggregatorSetSummaryWriter.pbtxt | 2 +- .../StatsAggregatorSummary.pbtxt | 2 +- .../ops_history_v2/StochasticCastToInt.pbtxt | 2 +- .../compat/ops_history_v2/StopGradient.pbtxt | 2 +- .../StoreMinibatchStatisticsInFdo.pbtxt | 2 +- .../compat/ops_history_v2/StridedSlice.pbtxt | 2 +- .../ops_history_v2/StridedSliceAssign.pbtxt | 2 +- .../ops_history_v2/StridedSliceGrad.pbtxt | 2 +- .../compat/ops_history_v2/StringFormat.pbtxt | 2 +- .../compat/ops_history_v2/StringJoin.pbtxt | 2 +- .../compat/ops_history_v2/StringLength.pbtxt | 2 +- .../compat/ops_history_v2/StringLower.pbtxt | 2 +- .../compat/ops_history_v2/StringNGrams.pbtxt | 2 +- .../compat/ops_history_v2/StringSplit.pbtxt | 2 +- .../compat/ops_history_v2/StringSplitV2.pbtxt | 2 +- .../compat/ops_history_v2/StringStrip.pbtxt | 2 +- .../ops_history_v2/StringToHashBucket.pbtxt | 2 +- .../StringToHashBucketFast.pbtxt | 2 +- .../StringToHashBucketStrong.pbtxt | 2 +- .../ops_history_v2/StringToNumber.pbtxt | 30 ++++++++++++++++++- .../compat/ops_history_v2/StringUpper.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Sub.pbtxt | 2 +- .../ops/compat/ops_history_v2/Substr.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Sum.pbtxt | 2 +- .../compat/ops_history_v2/SummaryWriter.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Svd.pbtxt | 2 +- .../ops/compat/ops_history_v2/Switch.pbtxt | 2 +- .../ops_history_v2/SymbolicGradient.pbtxt | 2 +- .../compat/ops_history_v2/SyncDevice.pbtxt | 2 +- .../ops_history_v2/TFRecordDataset.pbtxt | 2 +- .../ops_history_v2/TFRecordDatasetV2.pbtxt | 2 +- .../ops_history_v2/TFRecordReader.pbtxt | 2 +- .../ops_history_v2/TFRecordReaderV2.pbtxt | 2 +- .../TPUAnnotateTensorsWithDynamicShape.pbtxt | 2 +- .../ops_history_v2/TPUCompilationResult.pbtxt | 2 +- .../compat/ops_history_v2/TPUCompile.pbtxt | 2 +- .../TPUCompileSucceededAssert.pbtxt | 2 +- .../TPUCopyWithDynamicShape.pbtxt | 2 +- .../TPUEmbeddingActivations.pbtxt | 2 +- .../compat/ops_history_v2/TPUExecute.pbtxt | 2 +- .../TPUExecuteAndUpdateVariables.pbtxt | 2 +- .../ops_history_v2/TPUOrdinalSelector.pbtxt | 2 +- .../ops_history_v2/TPUPartitionedCall.pbtxt | 2 +- .../ops_history_v2/TPUPartitionedInput.pbtxt | 2 +- .../TPUPartitionedInputV2.pbtxt | 2 +- .../ops_history_v2/TPUPartitionedOutput.pbtxt | 2 +- .../TPUPartitionedOutputV2.pbtxt | 2 +- .../ops_history_v2/TPUReplicateMetadata.pbtxt | 2 +- .../ops_history_v2/TPUReplicatedInput.pbtxt | 2 +- .../ops_history_v2/TPUReplicatedOutput.pbtxt | 2 +- .../ops_history_v2/TPUReshardVariables.pbtxt | 2 +- .../compat/ops_history_v2/TPURoundRobin.pbtxt | 2 +- .../compat/ops_history_v2/TakeDataset.pbtxt | 2 +- .../TakeManySparseFromTensorsMap.pbtxt | 2 +- .../ops_history_v2/TakeWhileDataset.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Tan.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Tanh.pbtxt | 2 +- .../ops/compat/ops_history_v2/TanhGrad.pbtxt | 2 +- .../ops_history_v2/TemporaryVariable.pbtxt | 2 +- .../compat/ops_history_v2/TensorArray.pbtxt | 2 +- .../ops_history_v2/TensorArrayClose.pbtxt | 2 +- .../ops_history_v2/TensorArrayCloseV2.pbtxt | 2 +- .../ops_history_v2/TensorArrayCloseV3.pbtxt | 2 +- .../ops_history_v2/TensorArrayConcat.pbtxt | 2 +- .../ops_history_v2/TensorArrayConcatV2.pbtxt | 2 +- .../ops_history_v2/TensorArrayConcatV3.pbtxt | 2 +- .../ops_history_v2/TensorArrayGather.pbtxt | 2 +- .../ops_history_v2/TensorArrayGatherV2.pbtxt | 2 +- .../ops_history_v2/TensorArrayGatherV3.pbtxt | 2 +- .../ops_history_v2/TensorArrayGrad.pbtxt | 2 +- .../ops_history_v2/TensorArrayGradV2.pbtxt | 2 +- .../ops_history_v2/TensorArrayGradV3.pbtxt | 2 +- .../TensorArrayGradWithShape.pbtxt | 2 +- .../ops_history_v2/TensorArrayPack.pbtxt | 2 +- .../ops_history_v2/TensorArrayRead.pbtxt | 2 +- .../ops_history_v2/TensorArrayReadV2.pbtxt | 2 +- .../ops_history_v2/TensorArrayReadV3.pbtxt | 2 +- .../ops_history_v2/TensorArrayScatter.pbtxt | 2 +- .../ops_history_v2/TensorArrayScatterV2.pbtxt | 2 +- .../ops_history_v2/TensorArrayScatterV3.pbtxt | 2 +- .../ops_history_v2/TensorArraySize.pbtxt | 2 +- .../ops_history_v2/TensorArraySizeV2.pbtxt | 2 +- .../ops_history_v2/TensorArraySizeV3.pbtxt | 2 +- .../ops_history_v2/TensorArraySplit.pbtxt | 2 +- .../ops_history_v2/TensorArraySplitV2.pbtxt | 2 +- .../ops_history_v2/TensorArraySplitV3.pbtxt | 2 +- .../ops_history_v2/TensorArrayUnpack.pbtxt | 2 +- .../compat/ops_history_v2/TensorArrayV2.pbtxt | 2 +- .../compat/ops_history_v2/TensorArrayV3.pbtxt | 2 +- .../ops_history_v2/TensorArrayWrite.pbtxt | 2 +- .../ops_history_v2/TensorArrayWriteV2.pbtxt | 2 +- .../ops_history_v2/TensorArrayWriteV3.pbtxt | 2 +- .../compat/ops_history_v2/TensorDataset.pbtxt | 2 +- .../ops_history_v2/TensorListConcat.pbtxt | 2 +- .../TensorListConcatLists.pbtxt | 2 +- .../ops_history_v2/TensorListConcatV2.pbtxt | 2 +- .../TensorListElementShape.pbtxt | 2 +- .../ops_history_v2/TensorListFromTensor.pbtxt | 2 +- .../ops_history_v2/TensorListGather.pbtxt | 2 +- .../ops_history_v2/TensorListGetItem.pbtxt | 2 +- .../ops_history_v2/TensorListLength.pbtxt | 2 +- .../ops_history_v2/TensorListPopBack.pbtxt | 2 +- .../ops_history_v2/TensorListPushBack.pbtxt | 2 +- .../TensorListPushBackBatch.pbtxt | 2 +- .../ops_history_v2/TensorListReserve.pbtxt | 2 +- .../ops_history_v2/TensorListResize.pbtxt | 2 +- .../ops_history_v2/TensorListScatter.pbtxt | 2 +- .../TensorListScatterIntoExistingList.pbtxt | 2 +- .../ops_history_v2/TensorListScatterV2.pbtxt | 2 +- .../ops_history_v2/TensorListSetItem.pbtxt | 2 +- .../ops_history_v2/TensorListSplit.pbtxt | 2 +- .../ops_history_v2/TensorListStack.pbtxt | 2 +- .../ops_history_v2/TensorMapErase.pbtxt | 2 +- .../ops_history_v2/TensorMapHasKey.pbtxt | 2 +- .../ops_history_v2/TensorMapInsert.pbtxt | 2 +- .../ops_history_v2/TensorMapLookup.pbtxt | 2 +- .../compat/ops_history_v2/TensorMapSize.pbtxt | 2 +- .../ops_history_v2/TensorMapStackKeys.pbtxt | 2 +- .../ops_history_v2/TensorScatterAdd.pbtxt | 2 +- .../ops_history_v2/TensorScatterMax.pbtxt | 2 +- .../ops_history_v2/TensorScatterMin.pbtxt | 2 +- .../ops_history_v2/TensorScatterSub.pbtxt | 2 +- .../ops_history_v2/TensorScatterUpdate.pbtxt | 2 +- .../ops_history_v2/TensorSliceDataset.pbtxt | 2 +- .../TensorStridedSliceUpdate.pbtxt | 2 +- .../compat/ops_history_v2/TensorSummary.pbtxt | 2 +- .../ops_history_v2/TensorSummaryV2.pbtxt | 2 +- .../ops_history_v2/TextLineDataset.pbtxt | 2 +- .../ops_history_v2/TextLineReader.pbtxt | 2 +- .../ops_history_v2/TextLineReaderV2.pbtxt | 2 +- .../ops_history_v2/ThreadPoolDataset.pbtxt | 2 +- .../ops_history_v2/ThreadPoolHandle.pbtxt | 2 +- .../ThreadUnsafeUnigramCandidateSampler.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Tile.pbtxt | 2 +- .../ops/compat/ops_history_v2/TileGrad.pbtxt | 2 +- .../ops/compat/ops_history_v2/Timestamp.pbtxt | 2 +- .../ops/compat/ops_history_v2/ToBool.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/TopK.pbtxt | 2 +- .../compat/ops_history_v2/TopKUnique.pbtxt | 2 +- .../ops/compat/ops_history_v2/TopKV2.pbtxt | 2 +- .../ops_history_v2/TopKWithUnique.pbtxt | 2 +- .../ops_history_v2/TpuHandleToProtoKey.pbtxt | 2 +- .../ops/compat/ops_history_v2/Transpose.pbtxt | 2 +- .../ops_history_v2/TridiagonalMatMul.pbtxt | 2 +- .../ops_history_v2/TridiagonalSolve.pbtxt | 2 +- .../compat/ops_history_v2/TruncateDiv.pbtxt | 2 +- .../compat/ops_history_v2/TruncateMod.pbtxt | 2 +- .../ops_history_v2/TruncatedNormal.pbtxt | 2 +- .../ops/compat/ops_history_v2/Unbatch.pbtxt | 2 +- .../ops_history_v2/UnbatchDataset.pbtxt | 2 +- .../compat/ops_history_v2/UnbatchGrad.pbtxt | 2 +- .../ops_history_v2/UncompressElement.pbtxt | 2 +- .../compat/ops_history_v2/UnicodeDecode.pbtxt | 2 +- .../UnicodeDecodeWithOffsets.pbtxt | 2 +- .../compat/ops_history_v2/UnicodeEncode.pbtxt | 2 +- .../compat/ops_history_v2/UnicodeScript.pbtxt | 2 +- .../ops_history_v2/UnicodeTranscode.pbtxt | 2 +- .../UniformCandidateSampler.pbtxt | 2 +- .../ops_history_v2/UniformDequantize.pbtxt | 2 +- .../ops_history_v2/UniformQuantize.pbtxt | 2 +- .../ops_history_v2/UniformQuantizedAdd.pbtxt | 2 +- .../UniformQuantizedClipByValue.pbtxt | 2 +- .../UniformQuantizedConvolution.pbtxt | 2 +- .../UniformQuantizedConvolutionHybrid.pbtxt | 2 +- .../ops_history_v2/UniformQuantizedDot.pbtxt | 2 +- .../UniformQuantizedDotHybrid.pbtxt | 2 +- .../ops_history_v2/UniformRequantize.pbtxt | 2 +- .../ops/compat/ops_history_v2/Unique.pbtxt | 2 +- .../compat/ops_history_v2/UniqueDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/UniqueV2.pbtxt | 2 +- .../ops_history_v2/UniqueWithCounts.pbtxt | 2 +- .../ops_history_v2/UniqueWithCountsV2.pbtxt | 2 +- .../ops/compat/ops_history_v2/Unpack.pbtxt | 2 +- .../compat/ops_history_v2/UnravelIndex.pbtxt | 2 +- .../ops_history_v2/UnsortedSegmentJoin.pbtxt | 2 +- .../ops_history_v2/UnsortedSegmentMax.pbtxt | 2 +- .../ops_history_v2/UnsortedSegmentMin.pbtxt | 2 +- .../ops_history_v2/UnsortedSegmentProd.pbtxt | 2 +- .../ops_history_v2/UnsortedSegmentSum.pbtxt | 2 +- .../ops/compat/ops_history_v2/Unstage.pbtxt | 2 +- .../ops_history_v2/UnwrapDatasetVariant.pbtxt | 2 +- .../compat/ops_history_v2/UpperBound.pbtxt | 2 +- .../compat/ops_history_v2/VarHandleOp.pbtxt | 2 +- .../ops_history_v2/VarIsInitializedOp.pbtxt | 2 +- .../ops/compat/ops_history_v2/Variable.pbtxt | 2 +- .../compat/ops_history_v2/VariableShape.pbtxt | 2 +- .../compat/ops_history_v2/VariableV2.pbtxt | 2 +- .../WeightedFlatMapDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/Where.pbtxt | 2 +- .../ops/compat/ops_history_v2/While.pbtxt | 2 +- .../ops_history_v2/WholeFileReader.pbtxt | 2 +- .../ops_history_v2/WholeFileReaderV2.pbtxt | 2 +- .../compat/ops_history_v2/WindowDataset.pbtxt | 2 +- .../ops/compat/ops_history_v2/WindowOp.pbtxt | 2 +- .../ops_history_v2/WorkerHeartbeat.pbtxt | 2 +- .../ops_history_v2/WrapDatasetVariant.pbtxt | 2 +- .../ops_history_v2/WriteAudioSummary.pbtxt | 2 +- .../ops/compat/ops_history_v2/WriteFile.pbtxt | 2 +- .../ops_history_v2/WriteGraphSummary.pbtxt | 2 +- .../WriteHistogramSummary.pbtxt | 2 +- .../ops_history_v2/WriteImageSummary.pbtxt | 2 +- .../ops_history_v2/WriteRawProtoSummary.pbtxt | 2 +- .../ops_history_v2/WriteScalarSummary.pbtxt | 2 +- .../compat/ops_history_v2/WriteSummary.pbtxt | 2 +- .../ops/compat/ops_history_v2/Xdivy.pbtxt | 2 +- .../compat/ops_history_v2/XlaConcatND.pbtxt | 2 +- .../ops_history_v2/XlaHostCompute.pbtxt | 2 +- .../ops_history_v2/XlaRecvFromHost.pbtxt | 2 +- .../XlaRecvTPUEmbeddingActivations.pbtxt | 2 +- .../XlaRecvTPUEmbeddingActivationsV2.pbtxt | 2 +- ...XlaRecvTPUEmbeddingDeduplicationData.pbtxt | 2 +- ...aRecvTPUEmbeddingDeduplicationDataV2.pbtxt | 2 +- .../XlaSendTPUEmbeddingGradients.pbtxt | 2 +- .../XlaSendTPUEmbeddingGradientsV2.pbtxt | 2 +- .../compat/ops_history_v2/XlaSendToHost.pbtxt | 2 +- .../ops_history_v2/XlaSparseCoreAdagrad.pbtxt | 2 +- .../XlaSparseCoreAdagradMomentum.pbtxt | 2 +- .../ops_history_v2/XlaSparseCoreAdam.pbtxt | 2 +- .../ops_history_v2/XlaSparseCoreFtrl.pbtxt | 2 +- .../ops_history_v2/XlaSparseCoreSgd.pbtxt | 2 +- .../ops_history_v2/XlaSparseDenseMatmul.pbtxt | 2 +- ...enseMatmulGradWithAdagradAndCsrInput.pbtxt | 2 +- ...ulGradWithAdagradAndStaticBufferSize.pbtxt | 2 +- ...ulGradWithAdagradMomentumAndCsrInput.pbtxt | 2 +- ...thAdagradMomentumAndStaticBufferSize.pbtxt | 2 +- ...seDenseMatmulGradWithAdamAndCsrInput.pbtxt | 2 +- ...atmulGradWithAdamAndStaticBufferSize.pbtxt | 2 +- ...seDenseMatmulGradWithFtrlAndCsrInput.pbtxt | 2 +- ...atmulGradWithFtrlAndStaticBufferSize.pbtxt | 2 +- ...rseDenseMatmulGradWithSgdAndCsrInput.pbtxt | 2 +- ...MatmulGradWithSgdAndStaticBufferSize.pbtxt | 2 +- .../XlaSparseDenseMatmulWithCsrInput.pbtxt | 2 +- ...parseDenseMatmulWithStaticBufferSize.pbtxt | 2 +- .../compat/ops_history_v2/XlaSplitND.pbtxt | 2 +- .../ops/compat/ops_history_v2/Xlog1py.pbtxt | 2 +- .../ops/compat/ops_history_v2/Xlogy.pbtxt | 2 +- .../ops/compat/ops_history_v2/ZerosLike.pbtxt | 2 +- .../core/ops/compat/ops_history_v2/Zeta.pbtxt | 2 +- .../compat/ops_history_v2/ZipDataset.pbtxt | 2 +- tensorflow/core/ops/ops.pbtxt | 4 ++- 1550 files changed, 1579 insertions(+), 1550 deletions(-) diff --git a/tensorflow/core/ops/compat/ops_history_v2/Abort.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Abort.pbtxt index 5d88f788a8cda1..4752385d6ecf6d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Abort.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Abort.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Abort" attr { name: "error_msg" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Abs.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Abs.pbtxt index e901bc5794f682..5f44f9c6dca631 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Abs.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Abs.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Abs" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AccumulateNV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AccumulateNV2.pbtxt index 7a44ffd9fda37d..3e94aa154434f3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AccumulateNV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AccumulateNV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AccumulateNV2" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AccumulatorApplyGradient.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AccumulatorApplyGradient.pbtxt index e920df37858235..dd1c9870a63327 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AccumulatorApplyGradient.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AccumulatorApplyGradient.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AccumulatorApplyGradient" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AccumulatorNumAccumulated.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AccumulatorNumAccumulated.pbtxt index 131af0c1b5b24a..f378509e1e02f9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AccumulatorNumAccumulated.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AccumulatorNumAccumulated.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AccumulatorNumAccumulated" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AccumulatorSetGlobalStep.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AccumulatorSetGlobalStep.pbtxt index 0f50ac1852b834..9b4170df332a6a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AccumulatorSetGlobalStep.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AccumulatorSetGlobalStep.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AccumulatorSetGlobalStep" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AccumulatorTakeGradient.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AccumulatorTakeGradient.pbtxt index de3a79650b8f9d..1e28a68455fbf7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AccumulatorTakeGradient.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AccumulatorTakeGradient.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AccumulatorTakeGradient" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Acos.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Acos.pbtxt index 577976c6e4c579..504486147b2d30 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Acos.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Acos.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Acos" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Acosh.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Acosh.pbtxt index 136b9bfb209241..e53c8177f7f2f7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Acosh.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Acosh.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Acosh" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Add.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Add.pbtxt index d1f87827922fac..ce30e6d5544d04 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Add.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Add.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Add" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AddManySparseToTensorsMap.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AddManySparseToTensorsMap.pbtxt index 433a7f40b042e9..c1433ccbaf9e3b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AddManySparseToTensorsMap.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AddManySparseToTensorsMap.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AddManySparseToTensorsMap" input_arg { name: "sparse_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AddN.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AddN.pbtxt index 106bb1a9a9b7ae..8935c304edc5a8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AddN.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AddN.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AddN" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AddSparseToTensorsMap.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AddSparseToTensorsMap.pbtxt index 8c226e1965ce41..8a4c020d067b18 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AddSparseToTensorsMap.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AddSparseToTensorsMap.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AddSparseToTensorsMap" input_arg { name: "sparse_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AddV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AddV2.pbtxt index ad79c179db20ab..a2be10b4abe51e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AddV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AddV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AddV2" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AdjustContrast.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AdjustContrast.pbtxt index b2dc5e78e4b4e6..e51900d718bcc6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AdjustContrast.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AdjustContrast.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AdjustContrast" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AdjustContrastv2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AdjustContrastv2.pbtxt index 08fc84a5ab0cb0..6869f269dadf87 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AdjustContrastv2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AdjustContrastv2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AdjustContrastv2" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AdjustHue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AdjustHue.pbtxt index 6cce51abdc8b28..9a6c72d3d8f515 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AdjustHue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AdjustHue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AdjustHue" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AdjustSaturation.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AdjustSaturation.pbtxt index 4bc90aec904be0..918ea188d8523b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AdjustSaturation.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AdjustSaturation.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AdjustSaturation" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/All.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/All.pbtxt index 0afd8d468145d4..c0bc8f4beae4fc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/All.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/All.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "All" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AllCandidateSampler.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AllCandidateSampler.pbtxt index ff93f20c348920..e452850c261223 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AllCandidateSampler.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AllCandidateSampler.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AllCandidateSampler" input_arg { name: "true_classes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AllToAll.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AllToAll.pbtxt index 23796980cd62fc..005d16aec0d9e6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AllToAll.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AllToAll.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AllToAll" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Angle.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Angle.pbtxt index 5a1ff8b86c421c..ce28927f2b8118 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Angle.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Angle.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Angle" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AnonymousHashTable.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AnonymousHashTable.pbtxt index 1a62f991251223..15826399d357e4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AnonymousHashTable.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AnonymousHashTable.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AnonymousHashTable" output_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AnonymousIterator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AnonymousIterator.pbtxt index 8094c8d79d6511..bf8f8fc2ed49d8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AnonymousIterator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AnonymousIterator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AnonymousIterator" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AnonymousIteratorV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AnonymousIteratorV2.pbtxt index 6b6dad7212bfe9..e7dca69e3e041c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AnonymousIteratorV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AnonymousIteratorV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AnonymousIteratorV2" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AnonymousIteratorV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AnonymousIteratorV3.pbtxt index 328434c6042ce2..15e975a4453c2e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AnonymousIteratorV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AnonymousIteratorV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AnonymousIteratorV3" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AnonymousMemoryCache.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AnonymousMemoryCache.pbtxt index b3ab7638e496a9..7f15df3e956212 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AnonymousMemoryCache.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AnonymousMemoryCache.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AnonymousMemoryCache" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AnonymousMultiDeviceIterator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AnonymousMultiDeviceIterator.pbtxt index a19b45630b5d2f..b8afaa363d6a66 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AnonymousMultiDeviceIterator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AnonymousMultiDeviceIterator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AnonymousMultiDeviceIterator" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AnonymousMultiDeviceIteratorV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AnonymousMultiDeviceIteratorV3.pbtxt index 20d70a6acc9855..c503bf5c9d3e83 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AnonymousMultiDeviceIteratorV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AnonymousMultiDeviceIteratorV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AnonymousMultiDeviceIteratorV3" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AnonymousMutableDenseHashTable.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AnonymousMutableDenseHashTable.pbtxt index f14a62b4faa484..6026fd263d36ff 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AnonymousMutableDenseHashTable.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AnonymousMutableDenseHashTable.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AnonymousMutableDenseHashTable" input_arg { name: "empty_key" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AnonymousMutableHashTable.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AnonymousMutableHashTable.pbtxt index b375400621b8ee..6f34858f4d3bd3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AnonymousMutableHashTable.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AnonymousMutableHashTable.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AnonymousMutableHashTable" output_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AnonymousMutableHashTableOfTensors.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AnonymousMutableHashTableOfTensors.pbtxt index 692d1963adffc8..21b3744a98b973 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AnonymousMutableHashTableOfTensors.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AnonymousMutableHashTableOfTensors.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AnonymousMutableHashTableOfTensors" output_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AnonymousRandomSeedGenerator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AnonymousRandomSeedGenerator.pbtxt index e805c04147a283..da2558b596ac6b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AnonymousRandomSeedGenerator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AnonymousRandomSeedGenerator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AnonymousRandomSeedGenerator" input_arg { name: "seed" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AnonymousSeedGenerator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AnonymousSeedGenerator.pbtxt index c2b9eee9b6a976..370b0460e22f3c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AnonymousSeedGenerator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AnonymousSeedGenerator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AnonymousSeedGenerator" input_arg { name: "seed" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Any.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Any.pbtxt index 9b5d6350b83cfc..da020906a6d358 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Any.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Any.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Any" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyAdaMax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyAdaMax.pbtxt index fe6e128a18034c..06f6134c329f5a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyAdaMax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyAdaMax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyAdaMax" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyAdadelta.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyAdadelta.pbtxt index 211c8d4a64efca..477fd7e824e958 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyAdadelta.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyAdadelta.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyAdadelta" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyAdagrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyAdagrad.pbtxt index 5ede2bc76ea499..3da12f57396533 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyAdagrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyAdagrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyAdagrad" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyAdagradDA.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyAdagradDA.pbtxt index 3d00cb6f01bc9f..6795b12ea72d2f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyAdagradDA.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyAdagradDA.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyAdagradDA" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyAdagradV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyAdagradV2.pbtxt index f7673fe102988f..12f70b41e8c237 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyAdagradV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyAdagradV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyAdagradV2" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyAdam.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyAdam.pbtxt index fdcf81e0d03a74..ae512d525f0a76 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyAdam.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyAdam.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyAdam" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyAddSign.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyAddSign.pbtxt index 9e485fcfec65c2..f489857b9940d1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyAddSign.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyAddSign.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyAddSign" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyCenteredRMSProp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyCenteredRMSProp.pbtxt index 3e50dc38c7c12f..908b913f567d1c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyCenteredRMSProp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyCenteredRMSProp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyCenteredRMSProp" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyFtrl.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyFtrl.pbtxt index 6a67647bad7fae..2025575439504e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyFtrl.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyFtrl.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyFtrl" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyFtrlV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyFtrlV2.pbtxt index 201d61b2fa8373..d2d1af5d40ccb6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyFtrlV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyFtrlV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyFtrlV2" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyGradientDescent.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyGradientDescent.pbtxt index 25fb5723ebdaf9..8451d74ea1c4ba 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyGradientDescent.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyGradientDescent.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyGradientDescent" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyMomentum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyMomentum.pbtxt index 289c6ea6151f89..f54384d4fbd58f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyMomentum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyMomentum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyMomentum" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyPowerSign.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyPowerSign.pbtxt index fb3838c02a2e16..f2f8f79d5bad9b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyPowerSign.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyPowerSign.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyPowerSign" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyProximalAdagrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyProximalAdagrad.pbtxt index c25959517fcd7b..632d4d5ff91e2f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyProximalAdagrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyProximalAdagrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyProximalAdagrad" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyProximalGradientDescent.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyProximalGradientDescent.pbtxt index 3482b511cffd59..6a9c4e9ae6d27c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyProximalGradientDescent.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyProximalGradientDescent.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyProximalGradientDescent" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApplyRMSProp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApplyRMSProp.pbtxt index 24fe49118c943d..77a1e75f1f959f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApplyRMSProp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApplyRMSProp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApplyRMSProp" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApproxTopK.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApproxTopK.pbtxt index a7a32594df6aa1..b97ee1259579ee 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApproxTopK.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApproxTopK.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApproxTopK" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ApproximateEqual.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ApproximateEqual.pbtxt index be01e038fdef3a..0b62abc45a378d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ApproximateEqual.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ApproximateEqual.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ApproximateEqual" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ArgMax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ArgMax.pbtxt index 90679987916abd..0cfef9a64eb544 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ArgMax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ArgMax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ArgMax" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ArgMin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ArgMin.pbtxt index d2113ec185ec1d..4b83dd746cc3c5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ArgMin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ArgMin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ArgMin" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AsString.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AsString.pbtxt index b2b275470159db..7b985f6126d4a3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AsString.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AsString.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AsString" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Asin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Asin.pbtxt index 690d2a556e4be8..652e0ea057b672 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Asin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Asin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Asin" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Asinh.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Asinh.pbtxt index 3d78d9de59399d..7f31ec1236ce91 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Asinh.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Asinh.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Asinh" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Assert.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Assert.pbtxt index 343b2bd0b8da08..a891ca8c601309 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Assert.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Assert.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Assert" input_arg { name: "condition" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AssertCardinalityDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AssertCardinalityDataset.pbtxt index 147978a3363667..edf77a307ece00 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AssertCardinalityDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AssertCardinalityDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AssertCardinalityDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AssertNextDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AssertNextDataset.pbtxt index 0bdc9b4f9b278e..f1ebd27d543b14 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AssertNextDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AssertNextDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AssertNextDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AssertPrevDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AssertPrevDataset.pbtxt index 23bced136aa919..62899074c77fd8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AssertPrevDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AssertPrevDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AssertPrevDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Assign.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Assign.pbtxt index 6760fe93d26221..9255e12f1a8f11 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Assign.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Assign.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Assign" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AssignAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AssignAdd.pbtxt index eea9bd9ad00ac5..a631b1b807a5ba 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AssignAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AssignAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AssignAdd" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AssignAddVariableOp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AssignAddVariableOp.pbtxt index 91adfade5b6bc3..c3a8b74a0daaf8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AssignAddVariableOp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AssignAddVariableOp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AssignAddVariableOp" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AssignSub.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AssignSub.pbtxt index 3e13c2c8c52e17..0337c992553583 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AssignSub.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AssignSub.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AssignSub" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AssignSubVariableOp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AssignSubVariableOp.pbtxt index a93f5576589e29..a5c9a567d077ae 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AssignSubVariableOp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AssignSubVariableOp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AssignSubVariableOp" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AssignVariableOp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AssignVariableOp.pbtxt index fa84589b303260..e7c5290c53ecdb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AssignVariableOp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AssignVariableOp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AssignVariableOp" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AssignVariableXlaConcatND.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AssignVariableXlaConcatND.pbtxt index 8a17f4cadc3417..7c9d5df2c9c8c8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AssignVariableXlaConcatND.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AssignVariableXlaConcatND.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AssignVariableXlaConcatND" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Atan.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Atan.pbtxt index 78fd00eea4d39b..b063bf81719aed 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Atan.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Atan.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Atan" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Atan2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Atan2.pbtxt index 9ae51baed83ca7..e58675db4c19d0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Atan2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Atan2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Atan2" input_arg { name: "y" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Atanh.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Atanh.pbtxt index 76aaad73963159..28d417a08544e9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Atanh.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Atanh.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Atanh" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AudioSpectrogram.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AudioSpectrogram.pbtxt index 96bf31789fcec6..dbc2a2280dee3c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AudioSpectrogram.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AudioSpectrogram.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AudioSpectrogram" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AudioSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AudioSummary.pbtxt index 0a33e5ca25463e..4b1830595e07e4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AudioSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AudioSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AudioSummary" input_arg { name: "tag" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AudioSummaryV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AudioSummaryV2.pbtxt index e92012709af98d..313c044aaeb506 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AudioSummaryV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AudioSummaryV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AudioSummaryV2" input_arg { name: "tag" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AutoShardDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AutoShardDataset.pbtxt index 6ab98040d710eb..465b757c8e967b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AutoShardDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AutoShardDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AutoShardDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AvgPool.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AvgPool.pbtxt index f7472845fbcbd4..8e7db139a9a3c7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AvgPool.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AvgPool.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AvgPool" input_arg { name: "value" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AvgPool3D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AvgPool3D.pbtxt index 619b1b4c4eaa70..f3f60cbc1f18d2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AvgPool3D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AvgPool3D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AvgPool3D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AvgPool3DGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AvgPool3DGrad.pbtxt index 3c74eac2382c39..67fef9572878db 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AvgPool3DGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AvgPool3DGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AvgPool3DGrad" input_arg { name: "orig_input_shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/AvgPoolGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/AvgPoolGrad.pbtxt index f3952fab895c1f..6c72effaffaa43 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/AvgPoolGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/AvgPoolGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "AvgPoolGrad" input_arg { name: "orig_input_shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BandedTriangularSolve.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BandedTriangularSolve.pbtxt index c69a9650010a38..5cf85a62392a2f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BandedTriangularSolve.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BandedTriangularSolve.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BandedTriangularSolve" input_arg { name: "matrix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Barrier.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Barrier.pbtxt index bdc8f8f53ba666..9391157b888851 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Barrier.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Barrier.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Barrier" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BarrierClose.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BarrierClose.pbtxt index 4d121c65796ef0..69230484813264 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BarrierClose.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BarrierClose.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BarrierClose" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BarrierIncompleteSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BarrierIncompleteSize.pbtxt index cb040bc1db3c0f..0d17c183684932 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BarrierIncompleteSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BarrierIncompleteSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BarrierIncompleteSize" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BarrierInsertMany.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BarrierInsertMany.pbtxt index db48fd4b4554d9..86b64f603ebb0a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BarrierInsertMany.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BarrierInsertMany.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BarrierInsertMany" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BarrierReadySize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BarrierReadySize.pbtxt index 8d214c6e6fedf0..e7b063005593aa 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BarrierReadySize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BarrierReadySize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BarrierReadySize" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BarrierTakeMany.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BarrierTakeMany.pbtxt index 71e75d20ca7a08..e324042930451c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BarrierTakeMany.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BarrierTakeMany.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BarrierTakeMany" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Batch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Batch.pbtxt index 24fc21b2f288db..d3ee8d8513d434 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Batch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Batch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Batch" input_arg { name: "in_tensors" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchCholesky.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchCholesky.pbtxt index a8d4223751ce30..5d38acc7c2d563 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchCholesky.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchCholesky.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchCholesky" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchCholeskyGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchCholeskyGrad.pbtxt index 1beef0ebc8dda0..286ae3a81169d6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchCholeskyGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchCholeskyGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchCholeskyGrad" input_arg { name: "l" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchDataset.pbtxt index da9db473d53bbb..39467ae1bb6d33 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchDatasetV2.pbtxt index 4e95dbe7edd433..a3dc3afed0f53a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchDatasetV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchFFT.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchFFT.pbtxt index 872afa3d2c76d1..4fe86a392f079b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchFFT.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchFFT.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchFFT" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchFFT2D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchFFT2D.pbtxt index 1bd0127c2a50ee..b52a6bdca44512 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchFFT2D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchFFT2D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchFFT2D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchFFT3D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchFFT3D.pbtxt index c1d39ed70f7671..7f19cf13c10573 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchFFT3D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchFFT3D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchFFT3D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchFunction.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchFunction.pbtxt index e35c2c8618eddd..cf5e5896d084ba 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchFunction.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchFunction.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchFunction" input_arg { name: "in_tensors" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchIFFT.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchIFFT.pbtxt index 256417563cbc66..09d7b4ad7863a0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchIFFT.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchIFFT.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchIFFT" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchIFFT2D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchIFFT2D.pbtxt index 1c21807dededf1..23cc9cc51df5fc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchIFFT2D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchIFFT2D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchIFFT2D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchIFFT3D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchIFFT3D.pbtxt index 3d6ddd79d52095..10a78fab914335 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchIFFT3D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchIFFT3D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchIFFT3D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchMatMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchMatMul.pbtxt index 9d7ac3ca8e2a33..8bd778087f1467 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchMatMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchMatMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchMatMul" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchMatMulV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchMatMulV2.pbtxt index 4769d8220f53e1..ed724ddc49db94 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchMatMulV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchMatMulV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchMatMulV2" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchMatMulV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchMatMulV3.pbtxt index 1bcfdb937064ca..052a39e77c85ca 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchMatMulV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchMatMulV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchMatMulV3" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixBandPart.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixBandPart.pbtxt index 167fe7b69d484e..413681e612999d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixBandPart.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixBandPart.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchMatrixBandPart" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixDeterminant.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixDeterminant.pbtxt index 02e8c5dc93ec83..4bc6081aa4482a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixDeterminant.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixDeterminant.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchMatrixDeterminant" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixDiag.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixDiag.pbtxt index 4b06bc66e2a6dd..6104bef9340001 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixDiag.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixDiag.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchMatrixDiag" input_arg { name: "diagonal" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixDiagPart.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixDiagPart.pbtxt index 611f21ce1a1b67..9bd200f8cf5384 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixDiagPart.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixDiagPart.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchMatrixDiagPart" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixInverse.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixInverse.pbtxt index 7e413ef3b05da5..03a694d973a1b5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixInverse.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixInverse.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchMatrixInverse" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixSetDiag.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixSetDiag.pbtxt index 377fe7cea09d92..f459184a0a3a43 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixSetDiag.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixSetDiag.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchMatrixSetDiag" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixSolve.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixSolve.pbtxt index 020873dea15c47..909502e91ab546 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixSolve.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixSolve.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchMatrixSolve" input_arg { name: "matrix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixSolveLs.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixSolveLs.pbtxt index f6bc34b54f7b77..8c9d24efc7a404 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixSolveLs.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixSolveLs.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchMatrixSolveLs" input_arg { name: "matrix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixTriangularSolve.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixTriangularSolve.pbtxt index f259cceac49a17..406fa62171f511 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixTriangularSolve.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchMatrixTriangularSolve.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchMatrixTriangularSolve" input_arg { name: "matrix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchNormWithGlobalNormalization.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchNormWithGlobalNormalization.pbtxt index 846475ea1bfbbf..b9959a7d2dba82 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchNormWithGlobalNormalization.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchNormWithGlobalNormalization.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchNormWithGlobalNormalization" input_arg { name: "t" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchNormWithGlobalNormalizationGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchNormWithGlobalNormalizationGrad.pbtxt index c8b1b878780d0c..170d512aec6406 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchNormWithGlobalNormalizationGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchNormWithGlobalNormalizationGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchNormWithGlobalNormalizationGrad" input_arg { name: "t" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchSelfAdjointEig.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchSelfAdjointEig.pbtxt index e124892ed2d677..42ba04199f7c81 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchSelfAdjointEig.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchSelfAdjointEig.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchSelfAdjointEig" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchSelfAdjointEigV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchSelfAdjointEigV2.pbtxt index bc9128925886cb..df3996ea2378c1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchSelfAdjointEigV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchSelfAdjointEigV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchSelfAdjointEigV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchSvd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchSvd.pbtxt index 7080ed06437879..0595ffcd2a6eb6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchSvd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchSvd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchSvd" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchToSpace.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchToSpace.pbtxt index a96fdc58dfcb32..ac089e5ca76cf0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchToSpace.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchToSpace.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchToSpace" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchToSpaceND.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchToSpaceND.pbtxt index 18560c832264a4..464beb31614de7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchToSpaceND.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchToSpaceND.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BatchToSpaceND" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BesselI0.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BesselI0.pbtxt index b45aa841a7be67..78d524c916c861 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BesselI0.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BesselI0.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BesselI0" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BesselI0e.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BesselI0e.pbtxt index 8696576a64aa23..299cf82535aa06 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BesselI0e.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BesselI0e.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BesselI0e" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BesselI1.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BesselI1.pbtxt index 600fdb5771c3c1..e756c4655ddfdf 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BesselI1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BesselI1.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BesselI1" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BesselI1e.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BesselI1e.pbtxt index d7284658a8bbb8..a9c8d0eb0e5a1b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BesselI1e.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BesselI1e.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BesselI1e" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BesselJ0.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BesselJ0.pbtxt index 73ee83477debbf..35e14e5fdf173e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BesselJ0.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BesselJ0.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BesselJ0" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BesselJ1.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BesselJ1.pbtxt index de8e56a83c2739..ef8814ea8f723a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BesselJ1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BesselJ1.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BesselJ1" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BesselK0.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BesselK0.pbtxt index 6bd13898bf74fa..ebb364d0371e52 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BesselK0.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BesselK0.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BesselK0" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BesselK0e.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BesselK0e.pbtxt index 4b1125eb3eac38..e3e680c9549023 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BesselK0e.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BesselK0e.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BesselK0e" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BesselK1.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BesselK1.pbtxt index 04aef9f8fe0e5b..f7ca7c2f6e27b1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BesselK1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BesselK1.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BesselK1" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BesselK1e.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BesselK1e.pbtxt index 461db7a9222ea1..96fe68d7b7f313 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BesselK1e.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BesselK1e.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BesselK1e" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BesselY0.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BesselY0.pbtxt index 6d97aba4578264..cd62af3477370a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BesselY0.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BesselY0.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BesselY0" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BesselY1.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BesselY1.pbtxt index 6aa5123957547c..06f4c08eaf6932 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BesselY1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BesselY1.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BesselY1" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Betainc.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Betainc.pbtxt index 330e5456a7afa6..b1523bff9e9807 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Betainc.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Betainc.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Betainc" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BiasAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BiasAdd.pbtxt index 15e7dad6982866..2eba00671d375c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BiasAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BiasAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BiasAdd" input_arg { name: "value" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BiasAddGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BiasAddGrad.pbtxt index ea11e9ee5dda20..f85a2b9f6a5c01 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BiasAddGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BiasAddGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BiasAddGrad" input_arg { name: "out_backprop" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BiasAddV1.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BiasAddV1.pbtxt index b1f6b0cc1fc478..b35e45165b8d63 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BiasAddV1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BiasAddV1.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BiasAddV1" input_arg { name: "value" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Bincount.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Bincount.pbtxt index 1e8d56cd7b2dc9..12135bbd54a063 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Bincount.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Bincount.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Bincount" input_arg { name: "arr" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Bitcast.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Bitcast.pbtxt index e3c5814e29872b..993a0c6da9ec65 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Bitcast.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Bitcast.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Bitcast" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BitwiseAnd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BitwiseAnd.pbtxt index 1d8c1eb88c94e3..4b90e0e3de225a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BitwiseAnd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BitwiseAnd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BitwiseAnd" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BitwiseOr.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BitwiseOr.pbtxt index 681b469fc95130..393a506f339896 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BitwiseOr.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BitwiseOr.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BitwiseOr" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BitwiseXor.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BitwiseXor.pbtxt index b8d801443cff93..c72b23fc432331 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BitwiseXor.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BitwiseXor.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BitwiseXor" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BlockLSTM.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BlockLSTM.pbtxt index 2c4ad7866c77ea..63180f534f17b0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BlockLSTM.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BlockLSTM.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BlockLSTM" input_arg { name: "seq_len_max" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BlockLSTMGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BlockLSTMGrad.pbtxt index a03cda1c387f96..e7b6458bc8adb9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BlockLSTMGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BlockLSTMGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BlockLSTMGrad" input_arg { name: "seq_len_max" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BlockLSTMGradV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BlockLSTMGradV2.pbtxt index 11b3e4c48f42ea..ed0bd6b245645a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BlockLSTMGradV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BlockLSTMGradV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BlockLSTMGradV2" input_arg { name: "seq_len_max" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BlockLSTMV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BlockLSTMV2.pbtxt index 35df5226c632c4..5fce517277de83 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BlockLSTMV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BlockLSTMV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BlockLSTMV2" input_arg { name: "seq_len_max" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesAggregateStats.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesAggregateStats.pbtxt index 45fe3a867da0b5..72994094399599 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesAggregateStats.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesAggregateStats.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesAggregateStats" input_arg { name: "node_ids" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesBucketize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesBucketize.pbtxt index 61a170763f1dd8..5f277d3e0db3ae 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesBucketize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesBucketize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesBucketize" input_arg { name: "float_values" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCalculateBestFeatureSplit.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCalculateBestFeatureSplit.pbtxt index f885b3317006fe..50f35695e006fb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCalculateBestFeatureSplit.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCalculateBestFeatureSplit.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesCalculateBestFeatureSplit" input_arg { name: "node_id_range" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCalculateBestFeatureSplitV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCalculateBestFeatureSplitV2.pbtxt index 38ddb60c44ff2c..e900ed9c6748ca 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCalculateBestFeatureSplitV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCalculateBestFeatureSplitV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesCalculateBestFeatureSplitV2" input_arg { name: "node_id_range" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCalculateBestGainsPerFeature.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCalculateBestGainsPerFeature.pbtxt index e5f863eb4ac8ef..f100db7b38671c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCalculateBestGainsPerFeature.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCalculateBestGainsPerFeature.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesCalculateBestGainsPerFeature" input_arg { name: "node_id_range" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCenterBias.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCenterBias.pbtxt index 78eba4fcb4ba17..5c2fb9b5c54747 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCenterBias.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCenterBias.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesCenterBias" input_arg { name: "tree_ensemble_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCreateEnsemble.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCreateEnsemble.pbtxt index 8816746475cf45..cea6d23f91ff7b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCreateEnsemble.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCreateEnsemble.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesCreateEnsemble" input_arg { name: "tree_ensemble_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCreateQuantileStreamResource.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCreateQuantileStreamResource.pbtxt index d14e8e40b3a966..3d0d64adcd58fc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCreateQuantileStreamResource.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesCreateQuantileStreamResource.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesCreateQuantileStreamResource" input_arg { name: "quantile_stream_resource_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesDeserializeEnsemble.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesDeserializeEnsemble.pbtxt index 18aeb311841f89..b6d55ea0544c06 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesDeserializeEnsemble.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesDeserializeEnsemble.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesDeserializeEnsemble" input_arg { name: "tree_ensemble_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesEnsembleResourceHandleOp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesEnsembleResourceHandleOp.pbtxt index 6c79d9a75101cf..00573c1b95a7ec 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesEnsembleResourceHandleOp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesEnsembleResourceHandleOp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesEnsembleResourceHandleOp" output_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesExampleDebugOutputs.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesExampleDebugOutputs.pbtxt index 8e8f1aa7f332c7..066be042842aef 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesExampleDebugOutputs.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesExampleDebugOutputs.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesExampleDebugOutputs" input_arg { name: "tree_ensemble_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesFlushQuantileSummaries.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesFlushQuantileSummaries.pbtxt index d2b84d1e01c5ae..ae35e1023d4fae 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesFlushQuantileSummaries.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesFlushQuantileSummaries.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesFlushQuantileSummaries" input_arg { name: "quantile_stream_resource_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesGetEnsembleStates.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesGetEnsembleStates.pbtxt index a6bc2b2749409e..1959384a36b4a5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesGetEnsembleStates.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesGetEnsembleStates.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesGetEnsembleStates" input_arg { name: "tree_ensemble_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesMakeQuantileSummaries.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesMakeQuantileSummaries.pbtxt index 14eacf93d81cdd..bbefa8b8711b1a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesMakeQuantileSummaries.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesMakeQuantileSummaries.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesMakeQuantileSummaries" input_arg { name: "float_values" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesMakeStatsSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesMakeStatsSummary.pbtxt index 0772fc259310aa..49a82d2ba0c772 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesMakeStatsSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesMakeStatsSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesMakeStatsSummary" input_arg { name: "node_ids" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesPredict.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesPredict.pbtxt index 7f66b1248fe293..7f176cdc901666 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesPredict.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesPredict.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesPredict" input_arg { name: "tree_ensemble_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceAddSummaries.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceAddSummaries.pbtxt index ba12d9cdccfd88..97e875f4ea0bd8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceAddSummaries.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceAddSummaries.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesQuantileStreamResourceAddSummaries" input_arg { name: "quantile_stream_resource_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceDeserialize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceDeserialize.pbtxt index a08188e2dfbcc2..c3f01fef2a49a9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceDeserialize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceDeserialize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesQuantileStreamResourceDeserialize" input_arg { name: "quantile_stream_resource_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceFlush.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceFlush.pbtxt index ca7affea02c882..fc2613a0d372e9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceFlush.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceFlush.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesQuantileStreamResourceFlush" input_arg { name: "quantile_stream_resource_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceGetBucketBoundaries.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceGetBucketBoundaries.pbtxt index b359fce14edefc..b2aa8dd4e72321 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceGetBucketBoundaries.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceGetBucketBoundaries.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesQuantileStreamResourceGetBucketBoundaries" input_arg { name: "quantile_stream_resource_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceHandleOp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceHandleOp.pbtxt index c106a233c27023..ca40a0aa1ede35 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceHandleOp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesQuantileStreamResourceHandleOp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesQuantileStreamResourceHandleOp" output_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesSerializeEnsemble.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesSerializeEnsemble.pbtxt index 88a5330954209c..29d19f03117634 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesSerializeEnsemble.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesSerializeEnsemble.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesSerializeEnsemble" input_arg { name: "tree_ensemble_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesSparseAggregateStats.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesSparseAggregateStats.pbtxt index b482c304d3dac7..9260634bf1edb5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesSparseAggregateStats.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesSparseAggregateStats.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesSparseAggregateStats" input_arg { name: "node_ids" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesSparseCalculateBestFeatureSplit.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesSparseCalculateBestFeatureSplit.pbtxt index cd590bfbc3838b..86f7a5ffd2118c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesSparseCalculateBestFeatureSplit.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesSparseCalculateBestFeatureSplit.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesSparseCalculateBestFeatureSplit" input_arg { name: "node_id_range" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesTrainingPredict.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesTrainingPredict.pbtxt index 8b8b1053ff2c64..615f52c656de08 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesTrainingPredict.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesTrainingPredict.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesTrainingPredict" input_arg { name: "tree_ensemble_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesUpdateEnsemble.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesUpdateEnsemble.pbtxt index 9d50a5d4797069..9cd779e314a38c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesUpdateEnsemble.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesUpdateEnsemble.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesUpdateEnsemble" input_arg { name: "tree_ensemble_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesUpdateEnsembleV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesUpdateEnsembleV2.pbtxt index f1dc44c6f7ed06..2a573217f3ec25 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesUpdateEnsembleV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BoostedTreesUpdateEnsembleV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BoostedTreesUpdateEnsembleV2" input_arg { name: "tree_ensemble_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BroadcastArgs.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BroadcastArgs.pbtxt index 328d3fdb0da8ed..e6dc3990634597 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BroadcastArgs.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BroadcastArgs.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BroadcastArgs" input_arg { name: "s0" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BroadcastGradientArgs.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BroadcastGradientArgs.pbtxt index a0e831191f46cf..2e1d739f9881ac 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BroadcastGradientArgs.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BroadcastGradientArgs.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BroadcastGradientArgs" input_arg { name: "s0" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BroadcastTo.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BroadcastTo.pbtxt index 350e630e9df0fe..4d29f9ebeb8d22 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BroadcastTo.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BroadcastTo.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BroadcastTo" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Bucketize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Bucketize.pbtxt index 76174ffde85193..abe818e666cafd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Bucketize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Bucketize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Bucketize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/BytesProducedStatsDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BytesProducedStatsDataset.pbtxt index 2e71b250953aaf..7cbfbbd146c83f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BytesProducedStatsDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BytesProducedStatsDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "BytesProducedStatsDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CSRSparseMatrixComponents.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CSRSparseMatrixComponents.pbtxt index 050ebb37abfa96..614097be7efe9e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CSRSparseMatrixComponents.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CSRSparseMatrixComponents.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CSRSparseMatrixComponents" input_arg { name: "csr_sparse_matrix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CSRSparseMatrixToDense.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CSRSparseMatrixToDense.pbtxt index be8df4e2115271..ed0cab0fcca8f4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CSRSparseMatrixToDense.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CSRSparseMatrixToDense.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CSRSparseMatrixToDense" input_arg { name: "sparse_input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CSRSparseMatrixToSparseTensor.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CSRSparseMatrixToSparseTensor.pbtxt index dba86fe65b7ed8..39870fe0ca893c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CSRSparseMatrixToSparseTensor.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CSRSparseMatrixToSparseTensor.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CSRSparseMatrixToSparseTensor" input_arg { name: "sparse_matrix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CSVDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CSVDataset.pbtxt index 02655c49a13501..56e1a03ab62995 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CSVDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CSVDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CSVDataset" input_arg { name: "filenames" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CSVDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CSVDatasetV2.pbtxt index 0d3909d2bff660..c540e8407e8480 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CSVDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CSVDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CSVDatasetV2" input_arg { name: "filenames" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CTCBeamSearchDecoder.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CTCBeamSearchDecoder.pbtxt index 6bcd60420e5fdb..5bd1968c832d29 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CTCBeamSearchDecoder.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CTCBeamSearchDecoder.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CTCBeamSearchDecoder" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CTCGreedyDecoder.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CTCGreedyDecoder.pbtxt index ab7f9af6e8c943..65e36d169a3bd9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CTCGreedyDecoder.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CTCGreedyDecoder.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CTCGreedyDecoder" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CTCLoss.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CTCLoss.pbtxt index 54ced032292324..1d7e041dd9111e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CTCLoss.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CTCLoss.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CTCLoss" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CTCLossV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CTCLossV2.pbtxt index e9631882f6595a..5a68abaa4e5ad8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CTCLossV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CTCLossV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CTCLossV2" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CacheDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CacheDataset.pbtxt index 3acc3ce899e6c4..8b8ec246c5be80 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CacheDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CacheDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CacheDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CacheDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CacheDatasetV2.pbtxt index 78d13d28238562..43fe482cba6e0a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CacheDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CacheDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CacheDatasetV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Case.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Case.pbtxt index 5a2cb6a6dfafbb..39cfc3f723630d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Case.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Case.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Case" input_arg { name: "branch_index" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Cast.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Cast.pbtxt index 695048c5c775e2..581a8e4a8cc8a7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Cast.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Cast.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Cast" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Ceil.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Ceil.pbtxt index ad3771e9a84fdb..cdec08500072df 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Ceil.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Ceil.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Ceil" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CheckNumerics.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CheckNumerics.pbtxt index 64bafc119602a5..9e63b170b231db 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CheckNumerics.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CheckNumerics.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CheckNumerics" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CheckNumericsV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CheckNumericsV2.pbtxt index 362728e3dc53b1..ba06f6a3524e48 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CheckNumericsV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CheckNumericsV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CheckNumericsV2" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Cholesky.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Cholesky.pbtxt index bd26758db797c8..e3cee5fcf89aa8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Cholesky.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Cholesky.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Cholesky" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CholeskyGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CholeskyGrad.pbtxt index e526acd1f84513..0f7c7efd88c46f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CholeskyGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CholeskyGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CholeskyGrad" input_arg { name: "l" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ChooseFastestBranchDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ChooseFastestBranchDataset.pbtxt index ab120b02bcfafd..4850496a3954b3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ChooseFastestBranchDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ChooseFastestBranchDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ChooseFastestBranchDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ChooseFastestDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ChooseFastestDataset.pbtxt index f27c03e6e881d4..476e834edde27e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ChooseFastestDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ChooseFastestDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ChooseFastestDataset" input_arg { name: "input_datasets" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ClipByValue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ClipByValue.pbtxt index 821ca0c2425f7d..8652a168e85187 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ClipByValue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ClipByValue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ClipByValue" input_arg { name: "t" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CloseSummaryWriter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CloseSummaryWriter.pbtxt index 7a4dea11575c66..f67e1aaff2470c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CloseSummaryWriter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CloseSummaryWriter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CloseSummaryWriter" input_arg { name: "writer" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollateTPUEmbeddingMemory.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollateTPUEmbeddingMemory.pbtxt index 61f6f6fcccabaf..16c232371ad944 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollateTPUEmbeddingMemory.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollateTPUEmbeddingMemory.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollateTPUEmbeddingMemory" input_arg { name: "memory_configs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveAllToAllV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveAllToAllV2.pbtxt index db1520d1af3f19..85e76487c019f1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveAllToAllV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveAllToAllV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveAllToAllV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveAllToAllV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveAllToAllV3.pbtxt index ce690300d651bb..8e08f0bbbf21bd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveAllToAllV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveAllToAllV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveAllToAllV3" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveAssignGroupV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveAssignGroupV2.pbtxt index 888884e1dd1a87..891eab6162c961 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveAssignGroupV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveAssignGroupV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveAssignGroupV2" input_arg { name: "group_assignment" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastRecv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastRecv.pbtxt index aee2ceeb9dd728..bb2f18d7fd2637 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastRecv.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastRecv.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveBcastRecv" output_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastRecvV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastRecvV2.pbtxt index 7da7797030f61b..35a48eec2582f2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastRecvV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastRecvV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveBcastRecvV2" input_arg { name: "group_size" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastSend.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastSend.pbtxt index 5ac9ecf25b7da5..50719b2f193899 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastSend.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastSend.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveBcastSend" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastSendV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastSendV2.pbtxt index f5f058fca6ec59..3fa3a7d269d76b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastSendV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveBcastSendV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveBcastSendV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveGather.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveGather.pbtxt index 1c5a520a41daf3..7a4610a33f6072 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveGather.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveGather.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveGather" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveGatherV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveGatherV2.pbtxt index 3bd706437fafed..d85b0612c3dddb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveGatherV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveGatherV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveGatherV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveInitializeCommunicator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveInitializeCommunicator.pbtxt index 4ac5ab48cdba99..9b9340a118a35e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveInitializeCommunicator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveInitializeCommunicator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveInitializeCommunicator" input_arg { name: "group_key" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectivePermute.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectivePermute.pbtxt index 9a4ba13ea1917b..b3d33d2447e8e7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectivePermute.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectivePermute.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectivePermute" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduce.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduce.pbtxt index 303ce3ebd83abd..a93a8505dd3209 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduce.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduce.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveReduce" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceScatterV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceScatterV2.pbtxt index 49551a88bcf242..f64772c2d270f8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceScatterV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceScatterV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveReduceScatterV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceV2.pbtxt index 37e60303ec3657..b42db7ed9ae53c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveReduceV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceV3.pbtxt index 4459e5580d2264..53d1697add4c55 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CollectiveReduceV3" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CombinedNonMaxSuppression.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CombinedNonMaxSuppression.pbtxt index 983b747b1bac53..55e27122e9d862 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CombinedNonMaxSuppression.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CombinedNonMaxSuppression.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CombinedNonMaxSuppression" input_arg { name: "boxes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Complex.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Complex.pbtxt index c22e2b8045e10a..5d17643c89473c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Complex.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Complex.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Complex" input_arg { name: "real" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ComplexAbs.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ComplexAbs.pbtxt index 60f95372d7c985..6e7cfc1266aa15 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ComplexAbs.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ComplexAbs.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ComplexAbs" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CompositeTensorVariantFromComponents.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CompositeTensorVariantFromComponents.pbtxt index f6a900fd692d55..ed0660bc02e05f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CompositeTensorVariantFromComponents.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CompositeTensorVariantFromComponents.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CompositeTensorVariantFromComponents" input_arg { name: "components" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CompositeTensorVariantToComponents.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CompositeTensorVariantToComponents.pbtxt index b877c9535bd31d..fa8c5a4d95b316 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CompositeTensorVariantToComponents.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CompositeTensorVariantToComponents.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CompositeTensorVariantToComponents" input_arg { name: "encoded" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CompressElement.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CompressElement.pbtxt index 5da73190c445c8..07d8cb461afcbe 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CompressElement.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CompressElement.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CompressElement" input_arg { name: "components" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ComputeAccidentalHits.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ComputeAccidentalHits.pbtxt index 6d8dfad02d5845..0bac269ba6b9f2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ComputeAccidentalHits.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ComputeAccidentalHits.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ComputeAccidentalHits" input_arg { name: "true_classes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ComputeBatchSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ComputeBatchSize.pbtxt index 67dfb6ef08def0..13ab4eef4d0a61 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ComputeBatchSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ComputeBatchSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ComputeBatchSize" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataSize.pbtxt index b9f68e23d3d77b..0d4ec98a96f357 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ComputeDedupDataSize" output_arg { name: "num_elements" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataSizeV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataSizeV2.pbtxt index 7851f740a6b679..2493251c1fddc6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataSizeV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataSizeV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ComputeDedupDataSizeV2" output_arg { name: "num_elements" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataTupleMask.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataTupleMask.pbtxt index d04dc21c9546d6..ba91c01af1ba78 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataTupleMask.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataTupleMask.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ComputeDedupDataTupleMask" output_arg { name: "output_shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataTupleMaskV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataTupleMaskV2.pbtxt index f1e3e21c75ad1b..87deca2a2daecd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataTupleMaskV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ComputeDedupDataTupleMaskV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ComputeDedupDataTupleMaskV2" output_arg { name: "output_shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Concat.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Concat.pbtxt index 878098fbb5586a..21ff0fda4338f0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Concat.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Concat.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Concat" input_arg { name: "concat_dim" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConcatOffset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConcatOffset.pbtxt index a2d85962951864..f2fbb0062a1f11 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConcatOffset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConcatOffset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConcatOffset" input_arg { name: "concat_dim" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConcatV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConcatV2.pbtxt index d14b7b8d507d4e..d11dc14a9fffba 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConcatV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConcatV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConcatV2" input_arg { name: "values" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConcatenateDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConcatenateDataset.pbtxt index f3d827b100dd35..3bdf420d07b14b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConcatenateDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConcatenateDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConcatenateDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConditionalAccumulator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConditionalAccumulator.pbtxt index fbb1e42c13cb5b..678e9736ff978e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConditionalAccumulator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConditionalAccumulator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConditionalAccumulator" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConfigureAndInitializeGlobalTPU.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConfigureAndInitializeGlobalTPU.pbtxt index 0ea06f390bcb5f..96ba7c2527d732 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConfigureAndInitializeGlobalTPU.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConfigureAndInitializeGlobalTPU.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConfigureAndInitializeGlobalTPU" output_arg { name: "output" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConfigureDistributedTPU.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConfigureDistributedTPU.pbtxt index 790c463a57b7f2..72f944042dcaaf 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConfigureDistributedTPU.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConfigureDistributedTPU.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConfigureDistributedTPU" output_arg { name: "topology" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConfigureTPUEmbedding.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConfigureTPUEmbedding.pbtxt index 6949705a6a0183..6e61f8870c30b7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConfigureTPUEmbedding.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConfigureTPUEmbedding.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConfigureTPUEmbedding" attr { name: "config" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConfigureTPUEmbeddingHost.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConfigureTPUEmbeddingHost.pbtxt index e7c07085a59269..2e99447437a6e9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConfigureTPUEmbeddingHost.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConfigureTPUEmbeddingHost.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConfigureTPUEmbeddingHost" input_arg { name: "common_config" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConfigureTPUEmbeddingMemory.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConfigureTPUEmbeddingMemory.pbtxt index 138d1486a5bcaf..ab1b0b8511e4c0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConfigureTPUEmbeddingMemory.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConfigureTPUEmbeddingMemory.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConfigureTPUEmbeddingMemory" input_arg { name: "common_config" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Conj.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Conj.pbtxt index 74a543143d071a..6e98e166726cd9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Conj.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Conj.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Conj" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConjugateTranspose.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConjugateTranspose.pbtxt index 16047a1a25d6fd..417a2a53cd0e7d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConjugateTranspose.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConjugateTranspose.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConjugateTranspose" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConnectTPUEmbeddingHosts.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConnectTPUEmbeddingHosts.pbtxt index 78b2395bae806d..af1e2d5e0e596f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConnectTPUEmbeddingHosts.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConnectTPUEmbeddingHosts.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConnectTPUEmbeddingHosts" input_arg { name: "network_configs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Const.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Const.pbtxt index ea0c1c0864951d..6512d22e435f73 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Const.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Const.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Const" output_arg { name: "output" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConsumeMutexLock.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConsumeMutexLock.pbtxt index 3a36b864063ee6..4340267edf935b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConsumeMutexLock.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConsumeMutexLock.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConsumeMutexLock" input_arg { name: "mutex_lock" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ControlTrigger.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ControlTrigger.pbtxt index 6c77cdfebf25dd..2fe84fee80152f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ControlTrigger.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ControlTrigger.pbtxt @@ -1,3 +1,3 @@ -op { +op { name: "ControlTrigger" } diff --git a/tensorflow/core/ops/compat/ops_history_v2/Conv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Conv.pbtxt index 1651b7a2150dd7..c626db885b594f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Conv.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Conv.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Conv" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Conv2D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Conv2D.pbtxt index 1e7916c736761c..a95e3d23aea137 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Conv2D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Conv2D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Conv2D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropFilter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropFilter.pbtxt index a7bb1cbfc63127..1c656127f7e76a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropFilter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropFilter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Conv2DBackpropFilter" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropFilterV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropFilterV2.pbtxt index c56a507acddd69..e9bf463cc70237 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropFilterV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropFilterV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Conv2DBackpropFilterV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropInput.pbtxt index 0c46e05bbe485e..04d32885860ea6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropInput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropInput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Conv2DBackpropInput" input_arg { name: "input_sizes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropInputV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropInputV2.pbtxt index 82148176cadab3..414a06604d1d71 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropInputV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Conv2DBackpropInputV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Conv2DBackpropInputV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Conv3D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Conv3D.pbtxt index f62fda0c7c17c6..a04a66081fff37 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Conv3D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Conv3D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Conv3D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropFilter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropFilter.pbtxt index 1c43d8de15d2f2..3091cfdcba7629 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropFilter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropFilter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Conv3DBackpropFilter" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropFilterV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropFilterV2.pbtxt index fa96e2ceea7d65..2494eba6e8a2b6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropFilterV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropFilterV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Conv3DBackpropFilterV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropInput.pbtxt index 0a2429303607fa..7fa3a5548d7628 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropInput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropInput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Conv3DBackpropInput" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropInputV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropInputV2.pbtxt index 0bdc6e2d7d3d30..e01b33dc4a87ec 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropInputV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Conv3DBackpropInputV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Conv3DBackpropInputV2" input_arg { name: "input_sizes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConvertToCooTensor.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConvertToCooTensor.pbtxt index 31aeb0dfd962c4..65577c03134142 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConvertToCooTensor.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConvertToCooTensor.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConvertToCooTensor" input_arg { name: "indices_or_row_splits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConvertToListOfSparseCoreCooTensors.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConvertToListOfSparseCoreCooTensors.pbtxt index ad7dc09892cd22..137b4eca0acfb6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConvertToListOfSparseCoreCooTensors.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConvertToListOfSparseCoreCooTensors.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConvertToListOfSparseCoreCooTensors" input_arg { name: "indices_or_row_splits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ConvertToSparseCoreCsrWrappedCooTensor.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ConvertToSparseCoreCsrWrappedCooTensor.pbtxt index dc53acec41aed5..defd0e3b53b1c0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ConvertToSparseCoreCsrWrappedCooTensor.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ConvertToSparseCoreCsrWrappedCooTensor.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ConvertToSparseCoreCsrWrappedCooTensor" input_arg { name: "sorted_row_ids_list" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Copy.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Copy.pbtxt index 693fbfe081ac3f..258aecc2947457 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Copy.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Copy.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Copy" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CopyHost.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CopyHost.pbtxt index 89d49896724450..07eb864f460c25 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CopyHost.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CopyHost.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CopyHost" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CopyToMesh.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CopyToMesh.pbtxt index 3d1b4b1bffb059..50e0a66e784a74 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CopyToMesh.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CopyToMesh.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CopyToMesh" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CopyToMeshGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CopyToMeshGrad.pbtxt index c64c8dc3790bf8..e75ffe9bc3eb37 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CopyToMeshGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CopyToMeshGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CopyToMeshGrad" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Cos.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Cos.pbtxt index 00cdee090bb89b..52b7c1e795560c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Cos.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Cos.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Cos" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Cosh.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Cosh.pbtxt index 67b5ea0e591614..7a29316305061a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Cosh.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Cosh.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Cosh" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CountUpTo.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CountUpTo.pbtxt index e9ff14eb74db60..05726df8c11e4c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CountUpTo.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CountUpTo.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CountUpTo" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CreateSummaryDbWriter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CreateSummaryDbWriter.pbtxt index 1b8760c6bc65c9..7a5f844bbb6ed8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CreateSummaryDbWriter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CreateSummaryDbWriter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CreateSummaryDbWriter" input_arg { name: "writer" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CreateSummaryFileWriter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CreateSummaryFileWriter.pbtxt index b2edd27ffa5695..61106e9fc851eb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CreateSummaryFileWriter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CreateSummaryFileWriter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CreateSummaryFileWriter" input_arg { name: "writer" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CropAndResize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CropAndResize.pbtxt index cb5c951b82f088..57b02c6638874e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CropAndResize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CropAndResize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CropAndResize" input_arg { name: "image" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CropAndResizeGradBoxes.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CropAndResizeGradBoxes.pbtxt index 6d6be12cf1146f..d3f62e3ba4cea4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CropAndResizeGradBoxes.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CropAndResizeGradBoxes.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CropAndResizeGradBoxes" input_arg { name: "grads" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CropAndResizeGradImage.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CropAndResizeGradImage.pbtxt index a3d0713d1f34ac..6ae744f428a506 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CropAndResizeGradImage.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CropAndResizeGradImage.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CropAndResizeGradImage" input_arg { name: "grads" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Cross.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Cross.pbtxt index 12ba58289127fb..b80215fcbc16ee 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Cross.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Cross.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Cross" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CrossReplicaSum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CrossReplicaSum.pbtxt index 13c1bb582fccf9..947028f2c539a9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CrossReplicaSum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CrossReplicaSum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CrossReplicaSum" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNN.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNN.pbtxt index 4a7753b4988a00..181b2c30b153d5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNN.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNN.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CudnnRNN" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNBackprop.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNBackprop.pbtxt index bc95978eb0da08..fbab0e98b82484 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNBackprop.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNBackprop.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CudnnRNNBackprop" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNBackpropV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNBackpropV2.pbtxt index c76d1583656abd..3d2b03657a5e88 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNBackpropV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNBackpropV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CudnnRNNBackpropV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNBackpropV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNBackpropV3.pbtxt index b901c64dbcbd3f..19caef08548618 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNBackpropV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNBackpropV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CudnnRNNBackpropV3" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNCanonicalToParams.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNCanonicalToParams.pbtxt index 3b46f61998c093..b8162cb8123909 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNCanonicalToParams.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNCanonicalToParams.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CudnnRNNCanonicalToParams" input_arg { name: "num_layers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNCanonicalToParamsV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNCanonicalToParamsV2.pbtxt index 9c51d58cdbe96c..25b6fa3b1bb323 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNCanonicalToParamsV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNCanonicalToParamsV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CudnnRNNCanonicalToParamsV2" input_arg { name: "num_layers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNParamsSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNParamsSize.pbtxt index d88ab4c5499f7a..d67825a0b67366 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNParamsSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNParamsSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CudnnRNNParamsSize" input_arg { name: "num_layers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNParamsToCanonical.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNParamsToCanonical.pbtxt index 4cce256653d512..0c4917bdf6b554 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNParamsToCanonical.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNParamsToCanonical.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CudnnRNNParamsToCanonical" input_arg { name: "num_layers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNParamsToCanonicalV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNParamsToCanonicalV2.pbtxt index 70fe8004c48bdd..40c3cd58536000 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNParamsToCanonicalV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNParamsToCanonicalV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CudnnRNNParamsToCanonicalV2" input_arg { name: "num_layers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNV2.pbtxt index caf4d17262626c..fb05fea4beb8c7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CudnnRNNV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNV3.pbtxt index 91535bbe7adab8..8e71b147b727af 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CudnnRNNV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CudnnRNNV3" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Cumprod.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Cumprod.pbtxt index 9ae7fcfd322abb..2294de2efe238c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Cumprod.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Cumprod.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Cumprod" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Cumsum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Cumsum.pbtxt index a5817e2d44679b..4bf125962944d0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Cumsum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Cumsum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Cumsum" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/CumulativeLogsumexp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CumulativeLogsumexp.pbtxt index e6e47e700f278f..21d25d772f478e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CumulativeLogsumexp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CumulativeLogsumexp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "CumulativeLogsumexp" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DTensorRestoreV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DTensorRestoreV2.pbtxt index 8b96f73f15b9b1..3e082064faff93 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DTensorRestoreV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DTensorRestoreV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DTensorRestoreV2" input_arg { name: "prefix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DTensorSetGlobalTPUArray.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DTensorSetGlobalTPUArray.pbtxt index f466f7f4a0f78a..b7cf1cfa9c5904 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DTensorSetGlobalTPUArray.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DTensorSetGlobalTPUArray.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DTensorSetGlobalTPUArray" input_arg { name: "topology" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DataFormatDimMap.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DataFormatDimMap.pbtxt index 0550a275713d1d..a01806bad6afa3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DataFormatDimMap.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DataFormatDimMap.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DataFormatDimMap" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DataFormatVecPermute.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DataFormatVecPermute.pbtxt index 1304fbbc4bacd8..e439414d6d3503 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DataFormatVecPermute.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DataFormatVecPermute.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DataFormatVecPermute" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DataServiceDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DataServiceDataset.pbtxt index f15d2af6c6b9b9..449ae514cbb89c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DataServiceDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DataServiceDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DataServiceDataset" input_arg { name: "dataset_id" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DataServiceDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DataServiceDatasetV2.pbtxt index cc213b8a2c0f84..1b3547e62b6ad8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DataServiceDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DataServiceDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DataServiceDatasetV2" input_arg { name: "dataset_id" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DataServiceDatasetV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DataServiceDatasetV3.pbtxt index 8860baa354c08b..0151a0ed46c148 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DataServiceDatasetV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DataServiceDatasetV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DataServiceDatasetV3" input_arg { name: "dataset_id" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DataServiceDatasetV4.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DataServiceDatasetV4.pbtxt index ca1f1709f98197..1ea88a869b037c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DataServiceDatasetV4.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DataServiceDatasetV4.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DataServiceDatasetV4" input_arg { name: "dataset_id" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DatasetCardinality.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DatasetCardinality.pbtxt index 8e6e5f85f83e4d..435bf6774e03cd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DatasetCardinality.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DatasetCardinality.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DatasetCardinality" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DatasetFingerprint.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DatasetFingerprint.pbtxt index 11fcf992a5dd7b..db1c39a335720d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DatasetFingerprint.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DatasetFingerprint.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DatasetFingerprint" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DatasetFromGraph.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DatasetFromGraph.pbtxt index b7afa744de1fb4..54c8ed8b014816 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DatasetFromGraph.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DatasetFromGraph.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DatasetFromGraph" input_arg { name: "graph_def" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DatasetToGraph.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DatasetToGraph.pbtxt index 1cf25d1f8bf874..5bc711b6b66080 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DatasetToGraph.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DatasetToGraph.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DatasetToGraph" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DatasetToGraphV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DatasetToGraphV2.pbtxt index 8fd09dfe6ba036..9f0500c8bdf214 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DatasetToGraphV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DatasetToGraphV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DatasetToGraphV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DatasetToSingleElement.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DatasetToSingleElement.pbtxt index 1531425688c05b..8633fe65f4dcc1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DatasetToSingleElement.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DatasetToSingleElement.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DatasetToSingleElement" input_arg { name: "dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DatasetToTFRecord.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DatasetToTFRecord.pbtxt index e6547a87e970ec..e13f88e1c25d3b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DatasetToTFRecord.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DatasetToTFRecord.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DatasetToTFRecord" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Dawsn.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Dawsn.pbtxt index 7b69d80753e825..a2f8cba9d58b43 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Dawsn.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Dawsn.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Dawsn" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DebugGradientIdentity.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DebugGradientIdentity.pbtxt index 9c94f17ae44f02..e1b425730d8cd4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DebugGradientIdentity.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DebugGradientIdentity.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DebugGradientIdentity" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DebugGradientRefIdentity.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DebugGradientRefIdentity.pbtxt index 1cd87c9d5335b1..f75b7784ac0c05 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DebugGradientRefIdentity.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DebugGradientRefIdentity.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DebugGradientRefIdentity" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DebugIdentity.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DebugIdentity.pbtxt index f971fb59d398e2..50f97d847d4f3d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DebugIdentity.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DebugIdentity.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DebugIdentity" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DebugIdentityV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DebugIdentityV2.pbtxt index c6a523b2038bd3..ea92aaa4943cb1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DebugIdentityV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DebugIdentityV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DebugIdentityV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DebugIdentityV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DebugIdentityV3.pbtxt index ddcfd4f6a874a0..73d76b888bc0d8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DebugIdentityV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DebugIdentityV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DebugIdentityV3" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DebugNanCount.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DebugNanCount.pbtxt index cf635ac53aa6e1..82ae073497fd90 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DebugNanCount.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DebugNanCount.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DebugNanCount" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DebugNumericSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DebugNumericSummary.pbtxt index 965ef30ab24a90..d108b54e1cdea9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DebugNumericSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DebugNumericSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DebugNumericSummary" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DebugNumericSummaryV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DebugNumericSummaryV2.pbtxt index f58db91ce09782..58c11f58af0bf5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DebugNumericSummaryV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DebugNumericSummaryV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DebugNumericSummaryV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodeAndCropJpeg.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodeAndCropJpeg.pbtxt index ee8f8536e500b8..d1d767f4714b32 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodeAndCropJpeg.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodeAndCropJpeg.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodeAndCropJpeg" input_arg { name: "contents" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodeBase64.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodeBase64.pbtxt index 700152f16eccb5..cc7d61ce119cf4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodeBase64.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodeBase64.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodeBase64" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodeBmp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodeBmp.pbtxt index d8ab837cc10e35..40ac5f0b20bd00 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodeBmp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodeBmp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodeBmp" input_arg { name: "contents" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodeCSV.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodeCSV.pbtxt index e60342992df29d..f4fee2a95a5ff3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodeCSV.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodeCSV.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodeCSV" input_arg { name: "records" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodeCompressed.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodeCompressed.pbtxt index ec0e15b92b74ac..8a345fffa8f409 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodeCompressed.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodeCompressed.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodeCompressed" input_arg { name: "bytes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodeGif.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodeGif.pbtxt index c0e732cd5f5d8d..89b21b376e4035 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodeGif.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodeGif.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodeGif" input_arg { name: "contents" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodeImage.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodeImage.pbtxt index 6771daae228047..066ffd1091d0ee 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodeImage.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodeImage.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodeImage" input_arg { name: "contents" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodeJSONExample.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodeJSONExample.pbtxt index 348bf9b1cda17a..ec37ae546c5b7d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodeJSONExample.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodeJSONExample.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodeJSONExample" input_arg { name: "json_examples" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodeJpeg.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodeJpeg.pbtxt index ad09dea051a347..9a4b4e4443c530 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodeJpeg.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodeJpeg.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodeJpeg" input_arg { name: "contents" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodePaddedRaw.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodePaddedRaw.pbtxt index d0bd867a9c3893..d4c3dab8607e8a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodePaddedRaw.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodePaddedRaw.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodePaddedRaw" input_arg { name: "input_bytes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodePng.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodePng.pbtxt index 68c77685db7118..dd7bd024365408 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodePng.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodePng.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodePng" input_arg { name: "contents" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodeProtoV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodeProtoV2.pbtxt index e6a52119199998..ae72d29d4e72f9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodeProtoV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodeProtoV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodeProtoV2" input_arg { name: "bytes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodeRaw.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodeRaw.pbtxt index 5a3d5a475ffce7..bc8ce80563a202 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodeRaw.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodeRaw.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodeRaw" input_arg { name: "bytes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DecodeWav.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DecodeWav.pbtxt index 698e67694d1224..8eba7b95ad01f0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DecodeWav.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DecodeWav.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DecodeWav" input_arg { name: "contents" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DeepCopy.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DeepCopy.pbtxt index 197842671a3467..e673960be5dc04 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DeepCopy.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DeepCopy.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DeepCopy" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DeleteIterator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DeleteIterator.pbtxt index afd3cf0c3fdc15..3050ea922611dd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DeleteIterator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DeleteIterator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DeleteIterator" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DeleteMemoryCache.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DeleteMemoryCache.pbtxt index 21de51004ebdda..821293ba6a7587 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DeleteMemoryCache.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DeleteMemoryCache.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DeleteMemoryCache" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DeleteMultiDeviceIterator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DeleteMultiDeviceIterator.pbtxt index 4bba829418c3e4..b4ae640cec2abc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DeleteMultiDeviceIterator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DeleteMultiDeviceIterator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DeleteMultiDeviceIterator" input_arg { name: "multi_device_iterator" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DeleteRandomSeedGenerator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DeleteRandomSeedGenerator.pbtxt index 9272e55a0d7b63..0c0d2d1057422b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DeleteRandomSeedGenerator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DeleteRandomSeedGenerator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DeleteRandomSeedGenerator" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DeleteSeedGenerator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DeleteSeedGenerator.pbtxt index 125eae81544aa3..e588b1e9e7e85e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DeleteSeedGenerator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DeleteSeedGenerator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DeleteSeedGenerator" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DeleteSessionTensor.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DeleteSessionTensor.pbtxt index 6b4c55134baf66..def4c105535d95 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DeleteSessionTensor.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DeleteSessionTensor.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DeleteSessionTensor" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DenseBincount.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DenseBincount.pbtxt index c96abfc5ab012c..9bab6854e406a9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DenseBincount.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DenseBincount.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DenseBincount" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DenseCountSparseOutput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DenseCountSparseOutput.pbtxt index cb500a1f4c9931..be566eab9f438d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DenseCountSparseOutput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DenseCountSparseOutput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DenseCountSparseOutput" input_arg { name: "values" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DenseToCSRSparseMatrix.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DenseToCSRSparseMatrix.pbtxt index 159fe06324c252..c8b2f66380791a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DenseToCSRSparseMatrix.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DenseToCSRSparseMatrix.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DenseToCSRSparseMatrix" input_arg { name: "dense_input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DenseToDenseSetOperation.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DenseToDenseSetOperation.pbtxt index 74a3d2aaa02b5b..5188a82414ef18 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DenseToDenseSetOperation.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DenseToDenseSetOperation.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DenseToDenseSetOperation" input_arg { name: "set1" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DenseToSparseBatchDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DenseToSparseBatchDataset.pbtxt index 16838ea7d82424..cb972cca8aa42f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DenseToSparseBatchDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DenseToSparseBatchDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DenseToSparseBatchDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DenseToSparseSetOperation.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DenseToSparseSetOperation.pbtxt index e79d8f5e213011..71c9c37798f7b1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DenseToSparseSetOperation.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DenseToSparseSetOperation.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DenseToSparseSetOperation" input_arg { name: "set1" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DepthToSpace.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DepthToSpace.pbtxt index f5dd95b60c97d0..422fe7ff53f2a4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DepthToSpace.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DepthToSpace.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DepthToSpace" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DepthwiseConv2dNative.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DepthwiseConv2dNative.pbtxt index 90eae83c303cc0..805bf2d379aa03 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DepthwiseConv2dNative.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DepthwiseConv2dNative.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DepthwiseConv2dNative" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DepthwiseConv2dNativeBackpropFilter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DepthwiseConv2dNativeBackpropFilter.pbtxt index 199b883851c5ab..119933bf0ae7de 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DepthwiseConv2dNativeBackpropFilter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DepthwiseConv2dNativeBackpropFilter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DepthwiseConv2dNativeBackpropFilter" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DepthwiseConv2dNativeBackpropInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DepthwiseConv2dNativeBackpropInput.pbtxt index 99edc181b7d635..8ad511910587f6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DepthwiseConv2dNativeBackpropInput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DepthwiseConv2dNativeBackpropInput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DepthwiseConv2dNativeBackpropInput" input_arg { name: "input_sizes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Dequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Dequantize.pbtxt index 8db5d571979aea..1a39e1869bfb90 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Dequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Dequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Dequantize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DeserializeIterator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DeserializeIterator.pbtxt index 2d531bd93df362..1ae290e93c0f37 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DeserializeIterator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DeserializeIterator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DeserializeIterator" input_arg { name: "resource_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DeserializeManySparse.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DeserializeManySparse.pbtxt index 0ebb833a4b6383..f0e75d96d94d7e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DeserializeManySparse.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DeserializeManySparse.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DeserializeManySparse" input_arg { name: "serialized_sparse" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DeserializeSparse.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DeserializeSparse.pbtxt index c327f01280ec25..c23a9b58a62ed6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DeserializeSparse.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DeserializeSparse.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DeserializeSparse" input_arg { name: "serialized_sparse" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DestroyResourceOp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DestroyResourceOp.pbtxt index 355227a484d32b..aa16c5ad5235a5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DestroyResourceOp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DestroyResourceOp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DestroyResourceOp" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DestroyTemporaryVariable.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DestroyTemporaryVariable.pbtxt index 773e399783efe7..7e073b2f20b040 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DestroyTemporaryVariable.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DestroyTemporaryVariable.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DestroyTemporaryVariable" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DeviceIndex.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DeviceIndex.pbtxt index cfb79b60e3e44d..c513889e7b1813 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DeviceIndex.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DeviceIndex.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DeviceIndex" output_arg { name: "index" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Diag.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Diag.pbtxt index bae6dbfc7d2e76..92cb2071cf9788 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Diag.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Diag.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Diag" input_arg { name: "diagonal" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DiagPart.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DiagPart.pbtxt index 00e64aa39669e2..aec8c871407ccb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DiagPart.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DiagPart.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DiagPart" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Digamma.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Digamma.pbtxt index d29ad8723b06de..0c294e54f216ca 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Digamma.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Digamma.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Digamma" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Dilation2D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Dilation2D.pbtxt index 30f06dc42062a5..1db8503014ae72 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Dilation2D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Dilation2D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Dilation2D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Dilation2DBackpropFilter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Dilation2DBackpropFilter.pbtxt index 4dcc4b6f248e45..5a5a9f1dbb39cd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Dilation2DBackpropFilter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Dilation2DBackpropFilter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Dilation2DBackpropFilter" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Dilation2DBackpropInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Dilation2DBackpropInput.pbtxt index dc2e964457a8bf..8944211d86d45a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Dilation2DBackpropInput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Dilation2DBackpropInput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Dilation2DBackpropInput" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DirectedInterleaveDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DirectedInterleaveDataset.pbtxt index 9aaadbf0a6c014..61a9462fac3b59 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DirectedInterleaveDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DirectedInterleaveDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DirectedInterleaveDataset" input_arg { name: "selector_input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DisableCopyOnRead.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DisableCopyOnRead.pbtxt index 8a91880bab9c5b..61d189ae526cfa 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DisableCopyOnRead.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DisableCopyOnRead.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DisableCopyOnRead" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DistributedSave.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DistributedSave.pbtxt index 3cb284d442a94e..221820fddb1962 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DistributedSave.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DistributedSave.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DistributedSave" input_arg { name: "dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Div.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Div.pbtxt index 3ec0077535986c..fdc955f19ebe17 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Div.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Div.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Div" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DivNoNan.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DivNoNan.pbtxt index 40fc208137bcbd..ee9025f52f25a7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DivNoNan.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DivNoNan.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DivNoNan" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DrawBoundingBoxes.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DrawBoundingBoxes.pbtxt index 35952f3512e107..729817314e9c39 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DrawBoundingBoxes.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DrawBoundingBoxes.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DrawBoundingBoxes" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DrawBoundingBoxesV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DrawBoundingBoxesV2.pbtxt index 85ab33cee4f34c..0a561796ca0b61 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DrawBoundingBoxesV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DrawBoundingBoxesV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DrawBoundingBoxesV2" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DummyIterationCounter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DummyIterationCounter.pbtxt index 2f6bf602b173d3..b1df20cae731ca 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DummyIterationCounter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DummyIterationCounter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DummyIterationCounter" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DummyMemoryCache.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DummyMemoryCache.pbtxt index bbce6cafdc3958..63901e2585ea15 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DummyMemoryCache.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DummyMemoryCache.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DummyMemoryCache" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DummySeedGenerator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DummySeedGenerator.pbtxt index f08c0e07723d46..585bc7c7528344 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DummySeedGenerator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DummySeedGenerator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DummySeedGenerator" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DynamicEnqueueTPUEmbeddingArbitraryTensorBatch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DynamicEnqueueTPUEmbeddingArbitraryTensorBatch.pbtxt index 5ac83512996ed4..b494ece1a77218 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DynamicEnqueueTPUEmbeddingArbitraryTensorBatch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DynamicEnqueueTPUEmbeddingArbitraryTensorBatch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DynamicEnqueueTPUEmbeddingArbitraryTensorBatch" input_arg { name: "sample_indices_or_row_lengths" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DynamicEnqueueTPUEmbeddingRaggedTensorBatch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DynamicEnqueueTPUEmbeddingRaggedTensorBatch.pbtxt index 506a023aa23583..46adf791fe5977 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DynamicEnqueueTPUEmbeddingRaggedTensorBatch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DynamicEnqueueTPUEmbeddingRaggedTensorBatch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DynamicEnqueueTPUEmbeddingRaggedTensorBatch" input_arg { name: "sample_splits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DynamicPartition.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DynamicPartition.pbtxt index c497964b636bc3..3565bd6f754540 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DynamicPartition.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DynamicPartition.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DynamicPartition" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/DynamicStitch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DynamicStitch.pbtxt index 76226f65312d0a..aba8346995ccd3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/DynamicStitch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/DynamicStitch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "DynamicStitch" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EagerPyFunc.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EagerPyFunc.pbtxt index 302e420778b6f0..56c12e3845c6ff 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EagerPyFunc.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EagerPyFunc.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EagerPyFunc" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EditDistance.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EditDistance.pbtxt index 78fbc3f1ac4d9e..aba098b7020cdb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EditDistance.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EditDistance.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EditDistance" input_arg { name: "hypothesis_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Eig.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Eig.pbtxt index 3872e50f11ed76..d95892a88367df 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Eig.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Eig.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Eig" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Einsum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Einsum.pbtxt index 8a365bb22bf4d9..3855daa079bd40 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Einsum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Einsum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Einsum" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Elu.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Elu.pbtxt index 3c9ea1633638b9..4b8a81527569a9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Elu.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Elu.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Elu" input_arg { name: "features" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EluGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EluGrad.pbtxt index 2b2e8b974c18f7..cfbc9f99e314f3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EluGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EluGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EluGrad" input_arg { name: "gradients" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Empty.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Empty.pbtxt index 760cab96732ccc..147854bb88cf23 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Empty.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Empty.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Empty" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EmptyTensorList.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EmptyTensorList.pbtxt index 46015e651e219f..d15fa1ad47048c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EmptyTensorList.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EmptyTensorList.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EmptyTensorList" input_arg { name: "element_shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EmptyTensorMap.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EmptyTensorMap.pbtxt index ee1fc166627b41..25327b4e1e8bfb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EmptyTensorMap.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EmptyTensorMap.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EmptyTensorMap" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EncodeBase64.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EncodeBase64.pbtxt index 001c3d0d9fb36d..6e5241d0fd7033 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EncodeBase64.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EncodeBase64.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EncodeBase64" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EncodeJpeg.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EncodeJpeg.pbtxt index 165a02aed32de1..9f3c3453e33e32 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EncodeJpeg.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EncodeJpeg.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EncodeJpeg" input_arg { name: "image" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EncodeJpegVariableQuality.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EncodeJpegVariableQuality.pbtxt index 97f93d28b288f0..94c41ea4e5d574 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EncodeJpegVariableQuality.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EncodeJpegVariableQuality.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EncodeJpegVariableQuality" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EncodePng.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EncodePng.pbtxt index 7b7d161e8cba6e..7d2cbd85225125 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EncodePng.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EncodePng.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EncodePng" input_arg { name: "image" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EncodeProto.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EncodeProto.pbtxt index 2cea90e4183ddf..e619618946a6c0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EncodeProto.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EncodeProto.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EncodeProto" input_arg { name: "sizes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EncodeWav.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EncodeWav.pbtxt index e468d229ff4024..b013362a47d9e4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EncodeWav.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EncodeWav.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EncodeWav" input_arg { name: "audio" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingArbitraryTensorBatch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingArbitraryTensorBatch.pbtxt index f8d3f09b3cd32a..efb854a44baf5e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingArbitraryTensorBatch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingArbitraryTensorBatch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EnqueueTPUEmbeddingArbitraryTensorBatch" input_arg { name: "sample_indices_or_row_lengths" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingBatch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingBatch.pbtxt index b1be42b17eed44..a09c5d87f44fda 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingBatch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingBatch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EnqueueTPUEmbeddingBatch" input_arg { name: "batch" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingIntegerBatch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingIntegerBatch.pbtxt index 37b123d9791a2a..26d63b6e49dc5e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingIntegerBatch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingIntegerBatch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EnqueueTPUEmbeddingIntegerBatch" input_arg { name: "batch" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingRaggedTensorBatch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingRaggedTensorBatch.pbtxt index 5e1a15cbc95fcc..327bd4fb387700 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingRaggedTensorBatch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingRaggedTensorBatch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EnqueueTPUEmbeddingRaggedTensorBatch" input_arg { name: "sample_splits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingSparseBatch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingSparseBatch.pbtxt index 03a19c2feed572..64b8cb5178c5a6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingSparseBatch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingSparseBatch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EnqueueTPUEmbeddingSparseBatch" input_arg { name: "sample_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingSparseTensorBatch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingSparseTensorBatch.pbtxt index 19521bb9213393..ab1c9d264eca1e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingSparseTensorBatch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EnqueueTPUEmbeddingSparseTensorBatch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EnqueueTPUEmbeddingSparseTensorBatch" input_arg { name: "sample_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EnsureShape.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EnsureShape.pbtxt index 336e2aed15cff6..24fa5589131df3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EnsureShape.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EnsureShape.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EnsureShape" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Enter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Enter.pbtxt index d0a555da70c264..d39d15f34dbf6f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Enter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Enter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Enter" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Equal.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Equal.pbtxt index 8ad87805a6c558..a50cbdfcfeb1ac 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Equal.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Equal.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Equal" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Erf.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Erf.pbtxt index a53d8e380ef17c..680b736fa3e4a3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Erf.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Erf.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Erf" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Erfc.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Erfc.pbtxt index 589ca5a917c652..2fcfc68f04fbf2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Erfc.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Erfc.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Erfc" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Erfinv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Erfinv.pbtxt index 3b51f3b83c9d3b..78443dada86bc4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Erfinv.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Erfinv.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Erfinv" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/EuclideanNorm.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EuclideanNorm.pbtxt index b4916ba2c7ee8c..b88f521b57199f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/EuclideanNorm.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/EuclideanNorm.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "EuclideanNorm" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExecuteTPUEmbeddingPartitioner.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExecuteTPUEmbeddingPartitioner.pbtxt index c2a3d248d1f844..37025570305f98 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExecuteTPUEmbeddingPartitioner.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExecuteTPUEmbeddingPartitioner.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExecuteTPUEmbeddingPartitioner" output_arg { name: "common_config" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Exit.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Exit.pbtxt index a722f2b7d5ced9..56a1371bec6b3f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Exit.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Exit.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Exit" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Exp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Exp.pbtxt index a237e834372291..7afeb677a29a54 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Exp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Exp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Exp" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExpandDims.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExpandDims.pbtxt index 9bd506742ce3d7..c7bb353162c8c8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExpandDims.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExpandDims.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExpandDims" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalAssertNextDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalAssertNextDataset.pbtxt index d33df8039e8b66..937afb4e0ae48f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalAssertNextDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalAssertNextDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalAssertNextDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalAutoShardDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalAutoShardDataset.pbtxt index 7a609f0300a365..d8d8d7ffc96e4e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalAutoShardDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalAutoShardDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalAutoShardDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalBytesProducedStatsDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalBytesProducedStatsDataset.pbtxt index 509dd811947653..10555bb3ebfbf5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalBytesProducedStatsDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalBytesProducedStatsDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalBytesProducedStatsDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalCSVDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalCSVDataset.pbtxt index 85d8950a845328..a618d55fcb6289 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalCSVDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalCSVDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalCSVDataset" input_arg { name: "filenames" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalChooseFastestDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalChooseFastestDataset.pbtxt index 7d52752fffe21d..2c04d58db4c0a4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalChooseFastestDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalChooseFastestDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalChooseFastestDataset" input_arg { name: "input_datasets" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDatasetCardinality.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDatasetCardinality.pbtxt index d0e7b0934d0dd6..f6ba3657864e1a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDatasetCardinality.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDatasetCardinality.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalDatasetCardinality" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDatasetToTFRecord.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDatasetToTFRecord.pbtxt index 76b1f8c1d0036f..0d0e46c8b39ade 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDatasetToTFRecord.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDatasetToTFRecord.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalDatasetToTFRecord" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDenseToSparseBatchDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDenseToSparseBatchDataset.pbtxt index 94afefd024b797..c322ef95777609 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDenseToSparseBatchDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDenseToSparseBatchDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalDenseToSparseBatchDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDirectedInterleaveDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDirectedInterleaveDataset.pbtxt index c2214754670c61..2a877497ff29fd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDirectedInterleaveDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalDirectedInterleaveDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalDirectedInterleaveDataset" input_arg { name: "selector_input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalGroupByReducerDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalGroupByReducerDataset.pbtxt index 94d88c86c5fd0c..5e2fd15a22c908 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalGroupByReducerDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalGroupByReducerDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalGroupByReducerDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalGroupByWindowDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalGroupByWindowDataset.pbtxt index 25ede182ce533a..35f9c3c1a9547d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalGroupByWindowDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalGroupByWindowDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalGroupByWindowDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalIgnoreErrorsDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalIgnoreErrorsDataset.pbtxt index 5180b62d1e57b5..8fc4e7d2c8adee 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalIgnoreErrorsDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalIgnoreErrorsDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalIgnoreErrorsDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalIteratorGetDevice.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalIteratorGetDevice.pbtxt index d65b5f8bb6c4a0..8e1e10240f9ea7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalIteratorGetDevice.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalIteratorGetDevice.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalIteratorGetDevice" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalLMDBDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalLMDBDataset.pbtxt index 05c9db9bd03516..e8b8694a947d95 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalLMDBDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalLMDBDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalLMDBDataset" input_arg { name: "filenames" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalLatencyStatsDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalLatencyStatsDataset.pbtxt index aa401c14384320..29ba38a500c2b7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalLatencyStatsDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalLatencyStatsDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalLatencyStatsDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMapAndBatchDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMapAndBatchDataset.pbtxt index 519fa71185c471..7799116408c237 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMapAndBatchDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMapAndBatchDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalMapAndBatchDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMapDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMapDataset.pbtxt index 75fef0d63ddb9f..401331b716073d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMapDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMapDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalMapDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMatchingFilesDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMatchingFilesDataset.pbtxt index 29a30d9b257a07..45ef522bf7fdb4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMatchingFilesDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMatchingFilesDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalMatchingFilesDataset" input_arg { name: "patterns" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMaxIntraOpParallelismDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMaxIntraOpParallelismDataset.pbtxt index 004b1dd4f9067d..109f3906b31852 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMaxIntraOpParallelismDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalMaxIntraOpParallelismDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalMaxIntraOpParallelismDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalNonSerializableDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalNonSerializableDataset.pbtxt index 72b5a2ef5b2d32..b0c45ac19941ce 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalNonSerializableDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalNonSerializableDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalNonSerializableDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalParallelInterleaveDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalParallelInterleaveDataset.pbtxt index 5d9a463a93ab4f..a90031c9dad06a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalParallelInterleaveDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalParallelInterleaveDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalParallelInterleaveDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalParseExampleDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalParseExampleDataset.pbtxt index 3e150e33913691..44701f67286318 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalParseExampleDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalParseExampleDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalParseExampleDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalPrivateThreadPoolDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalPrivateThreadPoolDataset.pbtxt index e81bde0383e847..3098c30ba69d11 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalPrivateThreadPoolDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalPrivateThreadPoolDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalPrivateThreadPoolDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalRandomDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalRandomDataset.pbtxt index 04d661ee71297c..c3276a46df5b41 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalRandomDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalRandomDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalRandomDataset" input_arg { name: "seed" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalRebatchDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalRebatchDataset.pbtxt index 2656cc09900abd..2c024741713d48 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalRebatchDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalRebatchDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalRebatchDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalScanDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalScanDataset.pbtxt index cc4fc8c0e9c249..39d42061ef58d6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalScanDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalScanDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalScanDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSetStatsAggregatorDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSetStatsAggregatorDataset.pbtxt index 4a6d59b28c2e80..291597bf11b8dc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSetStatsAggregatorDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSetStatsAggregatorDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalSetStatsAggregatorDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSleepDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSleepDataset.pbtxt index 06dbf87163fdae..806d764d9cdf12 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSleepDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSleepDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalSleepDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSlidingWindowDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSlidingWindowDataset.pbtxt index 6d81c4c97f833b..ab18f4e214c578 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSlidingWindowDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSlidingWindowDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalSlidingWindowDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSqlDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSqlDataset.pbtxt index 634a65912fa64a..f56ce488df0aba 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSqlDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalSqlDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalSqlDataset" input_arg { name: "driver_name" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalStatsAggregatorHandle.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalStatsAggregatorHandle.pbtxt index a3493ec933b7c0..b00cadbca09498 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalStatsAggregatorHandle.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalStatsAggregatorHandle.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalStatsAggregatorHandle" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalStatsAggregatorSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalStatsAggregatorSummary.pbtxt index 87f63b282fe5f9..7886f7a6cb305e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalStatsAggregatorSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalStatsAggregatorSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalStatsAggregatorSummary" input_arg { name: "iterator" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalTakeWhileDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalTakeWhileDataset.pbtxt index db64c7eb605f44..7c9b4f86adbbe4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalTakeWhileDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalTakeWhileDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalTakeWhileDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalThreadPoolDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalThreadPoolDataset.pbtxt index dc50cd1b975c71..da23c415fd24f0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalThreadPoolDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalThreadPoolDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalThreadPoolDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalThreadPoolHandle.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalThreadPoolHandle.pbtxt index 47dc873ee476fb..8b230f90470f29 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalThreadPoolHandle.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalThreadPoolHandle.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalThreadPoolHandle" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalUnbatchDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalUnbatchDataset.pbtxt index 42819a367ee74e..83f3a39f5e9244 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalUnbatchDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalUnbatchDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalUnbatchDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalUniqueDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalUniqueDataset.pbtxt index 4aa39a71b2d8bc..95668c930d7269 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExperimentalUniqueDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExperimentalUniqueDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExperimentalUniqueDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Expint.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Expint.pbtxt index afb722b09ac2c3..3080bf15de9170 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Expint.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Expint.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Expint" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Expm1.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Expm1.pbtxt index 6ea265efc65ffe..b09aac454d000e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Expm1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Expm1.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Expm1" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExtractGlimpse.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExtractGlimpse.pbtxt index 1b305eb38867d2..597a77a3f3b8fa 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExtractGlimpse.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExtractGlimpse.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExtractGlimpse" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExtractGlimpseV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExtractGlimpseV2.pbtxt index 8dababaccb9423..08725f4504ce01 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExtractGlimpseV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExtractGlimpseV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExtractGlimpseV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExtractImagePatches.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExtractImagePatches.pbtxt index ada7acbbf57863..dee8034d6c7076 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExtractImagePatches.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExtractImagePatches.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExtractImagePatches" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExtractJpegShape.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExtractJpegShape.pbtxt index ef8068e1e91f00..ac3d34ca234fea 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExtractJpegShape.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExtractJpegShape.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExtractJpegShape" input_arg { name: "contents" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ExtractVolumePatches.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ExtractVolumePatches.pbtxt index 96d79b25ea52eb..09cc21a38b47e6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ExtractVolumePatches.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ExtractVolumePatches.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ExtractVolumePatches" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FFT.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FFT.pbtxt index 1bc74e426a247f..e986f323936f51 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FFT.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FFT.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FFT" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FFT2D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FFT2D.pbtxt index 38c837f8e91a87..adb1c253867f22 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FFT2D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FFT2D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FFT2D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FFT3D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FFT3D.pbtxt index df20d1970b2bd3..9266d6db4a688f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FFT3D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FFT3D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FFT3D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FFTND.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FFTND.pbtxt index 1cb40fa334b2f2..20afcc3c5b466d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FFTND.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FFTND.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FFTND" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FIFOQueue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FIFOQueue.pbtxt index d3d57195e72efd..c3321a8c6e4782 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FIFOQueue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FIFOQueue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FIFOQueue" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FIFOQueueV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FIFOQueueV2.pbtxt index 6f9c48db9ebcb7..9b1c8404d0ddac 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FIFOQueueV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FIFOQueueV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FIFOQueueV2" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Fact.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Fact.pbtxt index 426124e73678e1..90a0ad8dd00112 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Fact.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Fact.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Fact" output_arg { name: "fact" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FakeParam.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FakeParam.pbtxt index 7b286c4697e1d3..dc2a7c5ea46991 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FakeParam.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FakeParam.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FakeParam" output_arg { name: "output" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxArgs.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxArgs.pbtxt index 5faa52d9136ca7..2d8eac83c59be4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxArgs.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxArgs.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FakeQuantWithMinMaxArgs" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxArgsGradient.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxArgsGradient.pbtxt index 02850fbbc9b4d1..5d02f59da1e444 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxArgsGradient.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxArgsGradient.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FakeQuantWithMinMaxArgsGradient" input_arg { name: "gradients" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVars.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVars.pbtxt index 8fc979749d448a..233f5cc2f66134 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVars.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVars.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FakeQuantWithMinMaxVars" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVarsGradient.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVarsGradient.pbtxt index 595d2688c7b99d..cf8ed6f8b7e18d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVarsGradient.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVarsGradient.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FakeQuantWithMinMaxVarsGradient" input_arg { name: "gradients" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVarsPerChannel.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVarsPerChannel.pbtxt index 7300a1daf33c8a..551ae79cd94ce9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVarsPerChannel.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVarsPerChannel.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FakeQuantWithMinMaxVarsPerChannel" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt index 80e037e94e46f3..a787e251c60a6e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FakeQuantWithMinMaxVarsPerChannelGradient" input_arg { name: "gradients" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FakeQueue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FakeQueue.pbtxt index 1a45d2509e62a4..5e4cb62d941eb9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FakeQueue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FakeQueue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FakeQueue" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FileSystemSetConfiguration.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FileSystemSetConfiguration.pbtxt index 06b193366fea68..95d26fc09818f5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FileSystemSetConfiguration.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FileSystemSetConfiguration.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FileSystemSetConfiguration" input_arg { name: "scheme" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Fill.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Fill.pbtxt index e0859caefb7b41..543ae42239b4c3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Fill.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Fill.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Fill" input_arg { name: "dims" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FilterByLastComponentDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FilterByLastComponentDataset.pbtxt index 804e6f5dadc754..cf9bbc586524a9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FilterByLastComponentDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FilterByLastComponentDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FilterByLastComponentDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FilterDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FilterDataset.pbtxt index 193fa2eb091518..aad48d7aed4f62 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FilterDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FilterDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FilterDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FinalizeDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FinalizeDataset.pbtxt index cc8c78b502d528..38e49288d662e1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FinalizeDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FinalizeDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FinalizeDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FinalizeTPUEmbedding.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FinalizeTPUEmbedding.pbtxt index 00bc279a92250a..bd367fcdd12451 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FinalizeTPUEmbedding.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FinalizeTPUEmbedding.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FinalizeTPUEmbedding" input_arg { name: "common_config" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FinalizeTPUEmbeddingV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FinalizeTPUEmbeddingV2.pbtxt index fd57e3726f6040..63c69eaff3aaba 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FinalizeTPUEmbeddingV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FinalizeTPUEmbeddingV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FinalizeTPUEmbeddingV2" input_arg { name: "common_config" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Fingerprint.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Fingerprint.pbtxt index b00a23b0f22f8c..3a5585701ba722 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Fingerprint.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Fingerprint.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Fingerprint" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordDataset.pbtxt index 04aff9c6ec69fa..63522d5f47c331 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FixedLengthRecordDataset" input_arg { name: "filenames" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordDatasetV2.pbtxt index e27a084317ac5a..88d744513572c6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FixedLengthRecordDatasetV2" input_arg { name: "filenames" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordReader.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordReader.pbtxt index 9f3aeb4fa8f374..75b6018f249c13 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordReader.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordReader.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FixedLengthRecordReader" output_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordReaderV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordReaderV2.pbtxt index 1bd086e2d92964..b16e5225240580 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordReaderV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FixedLengthRecordReaderV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FixedLengthRecordReaderV2" output_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FixedUnigramCandidateSampler.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FixedUnigramCandidateSampler.pbtxt index 264c48f683c7ab..a7911344886c97 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FixedUnigramCandidateSampler.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FixedUnigramCandidateSampler.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FixedUnigramCandidateSampler" input_arg { name: "true_classes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FlatMapDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FlatMapDataset.pbtxt index f0515e584c7b92..dcf1a7ae71c41f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FlatMapDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FlatMapDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FlatMapDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Floor.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Floor.pbtxt index 0653fc9864ca3f..27e405e22de85e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Floor.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Floor.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Floor" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FloorDiv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FloorDiv.pbtxt index 5b6aa1c29f34e1..dcaff127c7d9d0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FloorDiv.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FloorDiv.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FloorDiv" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FloorMod.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FloorMod.pbtxt index 2f912df3b49de2..bbf48e9b826570 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FloorMod.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FloorMod.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FloorMod" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FlushSummaryWriter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FlushSummaryWriter.pbtxt index c7c659b79f6d5c..f928d4abe99a55 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FlushSummaryWriter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FlushSummaryWriter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FlushSummaryWriter" input_arg { name: "writer" diff --git a/tensorflow/core/ops/compat/ops_history_v2/For.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/For.pbtxt index d0cdc427284e7f..139990f3994b84 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/For.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/For.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "For" input_arg { name: "start" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FractionalAvgPool.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FractionalAvgPool.pbtxt index d503b56852ad5d..5fc527b066de60 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FractionalAvgPool.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FractionalAvgPool.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FractionalAvgPool" input_arg { name: "value" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FractionalAvgPoolGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FractionalAvgPoolGrad.pbtxt index 56028951959e56..cceb2fe903ab05 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FractionalAvgPoolGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FractionalAvgPoolGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FractionalAvgPoolGrad" input_arg { name: "orig_input_tensor_shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FractionalMaxPool.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FractionalMaxPool.pbtxt index 4360d2356fc979..a11b4ef05f1bc9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FractionalMaxPool.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FractionalMaxPool.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FractionalMaxPool" input_arg { name: "value" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FractionalMaxPoolGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FractionalMaxPoolGrad.pbtxt index 517e9117186358..711e98a5df1479 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FractionalMaxPoolGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FractionalMaxPoolGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FractionalMaxPoolGrad" input_arg { name: "orig_input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FresnelCos.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FresnelCos.pbtxt index 19d0bf180e584b..7be5bbcb2ff8d5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FresnelCos.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FresnelCos.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FresnelCos" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FresnelSin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FresnelSin.pbtxt index a05ab41d50ac8e..c8c91ba6a68b8d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FresnelSin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FresnelSin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FresnelSin" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNorm.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNorm.pbtxt index f2480179c00962..e5ac169b31ef96 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNorm.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNorm.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FusedBatchNorm" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormGrad.pbtxt index 9aabfafdd18be8..bff7eecf0ce852 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FusedBatchNormGrad" input_arg { name: "y_backprop" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormGradV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormGradV2.pbtxt index 78e2509276d1e5..dea20af8afcc6e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormGradV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormGradV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FusedBatchNormGradV2" input_arg { name: "y_backprop" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormGradV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormGradV3.pbtxt index 2d2d3234b91df4..aa05a575bfeec0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormGradV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormGradV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FusedBatchNormGradV3" input_arg { name: "y_backprop" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormV2.pbtxt index 1f8fe62eb8b782..99f482fc721d6d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FusedBatchNormV2" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormV3.pbtxt index 1afdb5c8875269..b1f608dbe3659d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FusedBatchNormV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FusedBatchNormV3" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FusedPadConv2D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FusedPadConv2D.pbtxt index b4142aea4fbbf9..1a89c018c4e059 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FusedPadConv2D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FusedPadConv2D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FusedPadConv2D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/FusedResizeAndPadConv2D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/FusedResizeAndPadConv2D.pbtxt index b5870f695e766a..cfc716fa1d695c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/FusedResizeAndPadConv2D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/FusedResizeAndPadConv2D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "FusedResizeAndPadConv2D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GRUBlockCell.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GRUBlockCell.pbtxt index 7c4519f6d7191f..7c0dd9d5fcff40 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GRUBlockCell.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GRUBlockCell.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GRUBlockCell" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GRUBlockCellGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GRUBlockCellGrad.pbtxt index a8b3b505af37d7..723bcbd0b6f49c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GRUBlockCellGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GRUBlockCellGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GRUBlockCellGrad" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Gather.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Gather.pbtxt index 37b2ae11c7eeb4..264a8366bb82b8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Gather.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Gather.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Gather" input_arg { name: "params" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GatherNd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GatherNd.pbtxt index 78376f9baff7b4..5ec2fd92ae606e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GatherNd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GatherNd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GatherNd" input_arg { name: "params" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GatherV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GatherV2.pbtxt index 607cb850411e0d..891e82e5fb3ed6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GatherV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GatherV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GatherV2" input_arg { name: "params" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GenerateBoundingBoxProposals.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GenerateBoundingBoxProposals.pbtxt index 6cb47345d8ccb7..adbc9d4c8e00f3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GenerateBoundingBoxProposals.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GenerateBoundingBoxProposals.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GenerateBoundingBoxProposals" input_arg { name: "scores" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GenerateVocabRemapping.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GenerateVocabRemapping.pbtxt index adb2f799c542d0..a095253dbb20b4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GenerateVocabRemapping.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GenerateVocabRemapping.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GenerateVocabRemapping" input_arg { name: "new_vocab_file" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GeneratorDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GeneratorDataset.pbtxt index b9ac804e012177..9f8da9c542648e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GeneratorDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GeneratorDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GeneratorDataset" input_arg { name: "init_func_other_args" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GetElementAtIndex.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GetElementAtIndex.pbtxt index 22b27a5530c826..82121301bd9fbb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GetElementAtIndex.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GetElementAtIndex.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GetElementAtIndex" input_arg { name: "dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GetMinibatchSplitsWithPhysicalReplica.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GetMinibatchSplitsWithPhysicalReplica.pbtxt index 764a8982073313..85bc30bf6dc098 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GetMinibatchSplitsWithPhysicalReplica.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GetMinibatchSplitsWithPhysicalReplica.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GetMinibatchSplitsWithPhysicalReplica" input_arg { name: "program_key" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GetMinibatchesInCsrWithPhysicalReplica.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GetMinibatchesInCsrWithPhysicalReplica.pbtxt index 45c3b531422fc9..79632c44322921 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GetMinibatchesInCsrWithPhysicalReplica.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GetMinibatchesInCsrWithPhysicalReplica.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GetMinibatchesInCsrWithPhysicalReplica" input_arg { name: "program_key" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GetOptions.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GetOptions.pbtxt index 1562e89627eddd..f8f161238d5630 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GetOptions.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GetOptions.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GetOptions" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GetSessionHandle.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GetSessionHandle.pbtxt index 90e66c7bd59690..e5345ec6f0ea1f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GetSessionHandle.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GetSessionHandle.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GetSessionHandle" input_arg { name: "value" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GetSessionHandleV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GetSessionHandleV2.pbtxt index 55dcf277a9e465..60405234b168b4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GetSessionHandleV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GetSessionHandleV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GetSessionHandleV2" input_arg { name: "value" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GetSessionTensor.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GetSessionTensor.pbtxt index fef3d08c0a7b9f..5c4cf8af9c55dc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GetSessionTensor.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GetSessionTensor.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GetSessionTensor" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GetStatsFromListOfSparseCoreCooTensors.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GetStatsFromListOfSparseCoreCooTensors.pbtxt index 8a9eb07d6259e5..f2ea7cfeb6ed88 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GetStatsFromListOfSparseCoreCooTensors.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GetStatsFromListOfSparseCoreCooTensors.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GetStatsFromListOfSparseCoreCooTensors" input_arg { name: "row_ids_list" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GetTpuTaskId.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GetTpuTaskId.pbtxt index dab64d5df42503..a5c46e67ad3e0e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GetTpuTaskId.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GetTpuTaskId.pbtxt @@ -4,5 +4,4 @@ op { name: "tpu_task_id" type: DT_INT32 } - is_stateful: false } diff --git a/tensorflow/core/ops/compat/ops_history_v2/GlobalIterId.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GlobalIterId.pbtxt index 5fa2302622c9ac..9dcfcb49a6d0cb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GlobalIterId.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GlobalIterId.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GlobalIterId" output_arg { name: "iter_id" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GlobalShuffleDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GlobalShuffleDataset.pbtxt index 131281a80ec590..0bce0beea54070 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GlobalShuffleDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GlobalShuffleDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GlobalShuffleDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Greater.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Greater.pbtxt index bbf5f46aab7d53..8860e3c0c1097c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Greater.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Greater.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Greater" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GreaterEqual.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GreaterEqual.pbtxt index db2bbd30ff2ce3..5bcdd3789c789a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GreaterEqual.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GreaterEqual.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GreaterEqual" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GroupByReducerDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GroupByReducerDataset.pbtxt index d550546eb64bd1..320e628f8aabba 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GroupByReducerDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GroupByReducerDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GroupByReducerDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GroupByWindowDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GroupByWindowDataset.pbtxt index da3bfd2542624d..0de0de53d0e7d3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GroupByWindowDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GroupByWindowDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GroupByWindowDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/GuaranteeConst.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/GuaranteeConst.pbtxt index 9ec864b74c024b..71d47e3758090e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/GuaranteeConst.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/GuaranteeConst.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "GuaranteeConst" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/HSVToRGB.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/HSVToRGB.pbtxt index 6169e00a62058a..2b209cc6547ab6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/HSVToRGB.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/HSVToRGB.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "HSVToRGB" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/HashTable.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/HashTable.pbtxt index eb3a57246ed277..83afe2b9448103 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/HashTable.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/HashTable.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "HashTable" output_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/HashTableV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/HashTableV2.pbtxt index ea6383d9f392b7..24a9bc7176d8ae 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/HashTableV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/HashTableV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "HashTableV2" output_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/HistogramFixedWidth.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/HistogramFixedWidth.pbtxt index 201df5b3467a1c..f39eabe4f72506 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/HistogramFixedWidth.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/HistogramFixedWidth.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "HistogramFixedWidth" input_arg { name: "values" diff --git a/tensorflow/core/ops/compat/ops_history_v2/HistogramSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/HistogramSummary.pbtxt index 6b5c4c9df1303a..0c46f397972bf6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/HistogramSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/HistogramSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "HistogramSummary" input_arg { name: "tag" diff --git a/tensorflow/core/ops/compat/ops_history_v2/HostConst.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/HostConst.pbtxt index 81975b57d5e37d..6dd4c1757073fd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/HostConst.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/HostConst.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "HostConst" output_arg { name: "output" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IFFT.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IFFT.pbtxt index a4c712b70cdf89..8571a132950cb1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IFFT.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IFFT.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IFFT" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IFFT2D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IFFT2D.pbtxt index f079604a4bec8c..0b208d46939354 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IFFT2D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IFFT2D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IFFT2D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IFFT3D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IFFT3D.pbtxt index bba5431e81392a..8b9667f882c67c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IFFT3D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IFFT3D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IFFT3D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IFFTND.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IFFTND.pbtxt index 72956229f0cde9..0dfb1fc9c51904 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IFFTND.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IFFTND.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IFFTND" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IRFFT.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IRFFT.pbtxt index 1a42324399eeb1..8ac3dfc979d8ef 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IRFFT.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IRFFT.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IRFFT" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IRFFT2D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IRFFT2D.pbtxt index e348e4e8cbdf39..5d1f872d605e32 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IRFFT2D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IRFFT2D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IRFFT2D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IRFFT3D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IRFFT3D.pbtxt index 3a6fa0da90e102..b69417ee1a7e57 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IRFFT3D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IRFFT3D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IRFFT3D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IRFFTND.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IRFFTND.pbtxt index c1d1da300962db..175092d5aa8158 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IRFFTND.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IRFFTND.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IRFFTND" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Identity.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Identity.pbtxt index 7646ed8ddb0964..f3ca3dbd24324a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Identity.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Identity.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Identity" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IdentityN.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IdentityN.pbtxt index aedd30ebafdddb..61c3b63279003c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IdentityN.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IdentityN.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IdentityN" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IdentityReader.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IdentityReader.pbtxt index 9e1aa0e11e219d..3330154b4d60ca 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IdentityReader.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IdentityReader.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IdentityReader" output_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IdentityReaderV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IdentityReaderV2.pbtxt index 27c72561ec1364..f37e9cedab961a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IdentityReaderV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IdentityReaderV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IdentityReaderV2" output_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/If.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/If.pbtxt index 595f6002939acb..7ccb12afa61896 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/If.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/If.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "If" input_arg { name: "cond" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Igamma.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Igamma.pbtxt index 821fa48275501f..d356f5ec5816c6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Igamma.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Igamma.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Igamma" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IgammaGradA.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IgammaGradA.pbtxt index 739385e8c4da4f..964067de5dcb69 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IgammaGradA.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IgammaGradA.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IgammaGradA" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Igammac.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Igammac.pbtxt index 1cfa801aeeb09a..cdf44f684bbdfc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Igammac.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Igammac.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Igammac" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IgnoreErrorsDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IgnoreErrorsDataset.pbtxt index 94fed463e57f7c..32af3bb466528e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IgnoreErrorsDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IgnoreErrorsDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IgnoreErrorsDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Imag.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Imag.pbtxt index f8c3ce95533bf1..1444b0c60b05f6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Imag.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Imag.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Imag" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ImageProjectiveTransformV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ImageProjectiveTransformV2.pbtxt index bd58faf8e16445..891b8b1cb7a6cc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ImageProjectiveTransformV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ImageProjectiveTransformV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ImageProjectiveTransformV2" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ImageProjectiveTransformV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ImageProjectiveTransformV3.pbtxt index 92887b2cc18151..115ee4269e7490 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ImageProjectiveTransformV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ImageProjectiveTransformV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ImageProjectiveTransformV3" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ImageSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ImageSummary.pbtxt index cbe4a0123f2eda..fafd7173195e3d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ImageSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ImageSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ImageSummary" input_arg { name: "tag" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ImmutableConst.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ImmutableConst.pbtxt index 51bb4050b9f65c..ba1180951f7083 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ImmutableConst.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ImmutableConst.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ImmutableConst" output_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ImportEvent.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ImportEvent.pbtxt index d80a4f171a4090..7be31dd0ae5844 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ImportEvent.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ImportEvent.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ImportEvent" input_arg { name: "writer" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InTopK.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InTopK.pbtxt index 881ead0a3631b5..6acd3b62e91d28 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InTopK.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InTopK.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InTopK" input_arg { name: "predictions" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InTopKV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InTopKV2.pbtxt index 003655ca052f60..a6ca2b83a45a37 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InTopKV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InTopKV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InTopKV2" input_arg { name: "predictions" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IndexFlatMapDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IndexFlatMapDataset.pbtxt index 348e21901d1743..e28bead11f8c03 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IndexFlatMapDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IndexFlatMapDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IndexFlatMapDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InfeedDequeue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InfeedDequeue.pbtxt index 595fbefc51366c..a48d840da663f3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InfeedDequeue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InfeedDequeue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InfeedDequeue" output_arg { name: "output" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InfeedDequeueTuple.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InfeedDequeueTuple.pbtxt index e3c4cdc4a0f5bb..dc6ab2b0b66476 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InfeedDequeueTuple.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InfeedDequeueTuple.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InfeedDequeueTuple" output_arg { name: "outputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InfeedEnqueue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InfeedEnqueue.pbtxt index f62d220e778769..759b91401e9de0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InfeedEnqueue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InfeedEnqueue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InfeedEnqueue" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InfeedEnqueuePrelinearizedBuffer.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InfeedEnqueuePrelinearizedBuffer.pbtxt index e6dfae7dcea10c..d281b700bd4aad 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InfeedEnqueuePrelinearizedBuffer.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InfeedEnqueuePrelinearizedBuffer.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InfeedEnqueuePrelinearizedBuffer" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InfeedEnqueueTuple.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InfeedEnqueueTuple.pbtxt index f1339fc123d0b6..459c5d9218fd6a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InfeedEnqueueTuple.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InfeedEnqueueTuple.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InfeedEnqueueTuple" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InitializeTable.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InitializeTable.pbtxt index 571557348711b2..35a46a99c24987 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InitializeTable.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InitializeTable.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InitializeTable" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InitializeTableFromDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InitializeTableFromDataset.pbtxt index 69dc6753d84f1a..fe0ec4d4b176c2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InitializeTableFromDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InitializeTableFromDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InitializeTableFromDataset" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InitializeTableFromTextFile.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InitializeTableFromTextFile.pbtxt index 08c63cbe8d4ced..77be4cadf98245 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InitializeTableFromTextFile.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InitializeTableFromTextFile.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InitializeTableFromTextFile" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InitializeTableFromTextFileV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InitializeTableFromTextFileV2.pbtxt index 69b41a04dc866e..6593434f2a6f4d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InitializeTableFromTextFileV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InitializeTableFromTextFileV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InitializeTableFromTextFileV2" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InitializeTableV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InitializeTableV2.pbtxt index 6e7aeb5f95946e..62c565902faf69 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InitializeTableV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InitializeTableV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InitializeTableV2" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InplaceAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InplaceAdd.pbtxt index 31799cddf4fee9..7c6685770b7f3f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InplaceAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InplaceAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InplaceAdd" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InplaceSub.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InplaceSub.pbtxt index 28754d0755e050..42d6c14a586c49 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InplaceSub.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InplaceSub.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InplaceSub" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InplaceUpdate.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InplaceUpdate.pbtxt index 448bff4d381669..94b7f24aecc2ca 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InplaceUpdate.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InplaceUpdate.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InplaceUpdate" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InterleaveDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InterleaveDataset.pbtxt index 9a2505d5540d19..124a84cf82fc55 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InterleaveDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InterleaveDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InterleaveDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Inv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Inv.pbtxt index 30ed71735243f7..0c191790030e8d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Inv.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Inv.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Inv" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InvGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InvGrad.pbtxt index 1c1d4971bf43f5..af882a90b23f58 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InvGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InvGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InvGrad" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Invert.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Invert.pbtxt index 6915b9d38fd471..cd9c81231790c4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Invert.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Invert.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Invert" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/InvertPermutation.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/InvertPermutation.pbtxt index 74c7d64b10ba81..fa028961e3220c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/InvertPermutation.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/InvertPermutation.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "InvertPermutation" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IsBoostedTreesEnsembleInitialized.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IsBoostedTreesEnsembleInitialized.pbtxt index 4086fdf15d3ffa..1b19fef0df2c93 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IsBoostedTreesEnsembleInitialized.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IsBoostedTreesEnsembleInitialized.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IsBoostedTreesEnsembleInitialized" input_arg { name: "tree_ensemble_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IsBoostedTreesQuantileStreamResourceInitialized.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IsBoostedTreesQuantileStreamResourceInitialized.pbtxt index 868a34f393b315..359e0e9ba5798c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IsBoostedTreesQuantileStreamResourceInitialized.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IsBoostedTreesQuantileStreamResourceInitialized.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IsBoostedTreesQuantileStreamResourceInitialized" input_arg { name: "quantile_stream_resource_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IsFinite.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IsFinite.pbtxt index d3051d645ba2b1..8410dce0cb011f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IsFinite.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IsFinite.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IsFinite" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IsInf.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IsInf.pbtxt index 20604d6e930955..1ce6c74691e3d6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IsInf.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IsInf.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IsInf" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IsNan.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IsNan.pbtxt index bc6c66435d00e0..826f2fff6c507d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IsNan.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IsNan.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IsNan" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IsTPUEmbeddingInitialized.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IsTPUEmbeddingInitialized.pbtxt index 6a6a893bfddcd4..bbfe80cc97ac21 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IsTPUEmbeddingInitialized.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IsTPUEmbeddingInitialized.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IsTPUEmbeddingInitialized" output_arg { name: "is_tpu_embedding_initialized" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IsVariableInitialized.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IsVariableInitialized.pbtxt index c61e3263a6aeb9..03496db8d32030 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IsVariableInitialized.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IsVariableInitialized.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IsVariableInitialized" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IsotonicRegression.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IsotonicRegression.pbtxt index 3c9050f67f2607..abe6fb4bbd849a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IsotonicRegression.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IsotonicRegression.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IsotonicRegression" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Iterator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Iterator.pbtxt index a9b8b66ee19046..76b9fdef4ed26e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Iterator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Iterator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Iterator" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IteratorFromStringHandle.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IteratorFromStringHandle.pbtxt index e4964182ad4259..ebd34378194d2d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IteratorFromStringHandle.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IteratorFromStringHandle.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IteratorFromStringHandle" input_arg { name: "string_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IteratorFromStringHandleV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IteratorFromStringHandleV2.pbtxt index 3faf956c0b6030..624c47394db730 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IteratorFromStringHandleV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IteratorFromStringHandleV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IteratorFromStringHandleV2" input_arg { name: "string_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IteratorGetDevice.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IteratorGetDevice.pbtxt index 02020169c71f71..8d379c1557b2ab 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IteratorGetDevice.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IteratorGetDevice.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IteratorGetDevice" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IteratorGetModelProto.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IteratorGetModelProto.pbtxt index ee47f9d7d43634..b1343becfa9bd6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IteratorGetModelProto.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IteratorGetModelProto.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IteratorGetModelProto" input_arg { name: "iterator" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IteratorGetNext.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IteratorGetNext.pbtxt index daddc26f37be32..f204011ed431b7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IteratorGetNext.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IteratorGetNext.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IteratorGetNext" input_arg { name: "iterator" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IteratorGetNextAsOptional.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IteratorGetNextAsOptional.pbtxt index 5cbd8314b9f0e7..c1a532f165c6f0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IteratorGetNextAsOptional.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IteratorGetNextAsOptional.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IteratorGetNextAsOptional" input_arg { name: "iterator" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IteratorGetNextSync.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IteratorGetNextSync.pbtxt index c7f29d2773cfad..e1a7351d2da357 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IteratorGetNextSync.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IteratorGetNextSync.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IteratorGetNextSync" input_arg { name: "iterator" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IteratorToStringHandle.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IteratorToStringHandle.pbtxt index 47fa0764034917..87f2dffc941b41 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IteratorToStringHandle.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IteratorToStringHandle.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IteratorToStringHandle" input_arg { name: "resource_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/IteratorV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/IteratorV2.pbtxt index 244fe9d0a49c62..6f7ab705485da4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/IteratorV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/IteratorV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "IteratorV2" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/KMC2ChainInitialization.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/KMC2ChainInitialization.pbtxt index f695595658365b..e9640975b0e685 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/KMC2ChainInitialization.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/KMC2ChainInitialization.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "KMC2ChainInitialization" input_arg { name: "distances" diff --git a/tensorflow/core/ops/compat/ops_history_v2/KmeansPlusPlusInitialization.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/KmeansPlusPlusInitialization.pbtxt index ea5914ba22f17c..27ab4b34885bc9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/KmeansPlusPlusInitialization.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/KmeansPlusPlusInitialization.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "KmeansPlusPlusInitialization" input_arg { name: "points" diff --git a/tensorflow/core/ops/compat/ops_history_v2/KthOrderStatistic.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/KthOrderStatistic.pbtxt index 3b0ca9d2d07479..8e5b79cec0442b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/KthOrderStatistic.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/KthOrderStatistic.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "KthOrderStatistic" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/L2Loss.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/L2Loss.pbtxt index 1e72b270d39aee..90e8619d09f2e7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/L2Loss.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/L2Loss.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "L2Loss" input_arg { name: "t" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LMDBDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LMDBDataset.pbtxt index 6609f2d64fd84f..9ba1bd98191f8e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LMDBDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LMDBDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LMDBDataset" input_arg { name: "filenames" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LMDBReader.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LMDBReader.pbtxt index b6f52544c52965..967c74bb72c778 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LMDBReader.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LMDBReader.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LMDBReader" output_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LRN.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LRN.pbtxt index aba0c94025dd81..75880682c31830 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LRN.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LRN.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LRN" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LRNGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LRNGrad.pbtxt index 65a6c221aaeff0..37db775eaa236b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LRNGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LRNGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LRNGrad" input_arg { name: "input_grads" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LSTMBlockCell.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LSTMBlockCell.pbtxt index 88643e69b1c1ae..f1071f7fc51699 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LSTMBlockCell.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LSTMBlockCell.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LSTMBlockCell" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LSTMBlockCellGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LSTMBlockCellGrad.pbtxt index 1851e5bd6e6bfb..b20d47c5c01ef5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LSTMBlockCellGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LSTMBlockCellGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LSTMBlockCellGrad" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LatencyStatsDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LatencyStatsDataset.pbtxt index cdb1716e83521e..546bcdcbd1233e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LatencyStatsDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LatencyStatsDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LatencyStatsDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LeakyRelu.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LeakyRelu.pbtxt index 4cbb4fdf427b38..c0358f96a87f10 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LeakyRelu.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LeakyRelu.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LeakyRelu" input_arg { name: "features" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LeakyReluGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LeakyReluGrad.pbtxt index 524638322a3f62..786872202c456d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LeakyReluGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LeakyReluGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LeakyReluGrad" input_arg { name: "gradients" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LearnedUnigramCandidateSampler.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LearnedUnigramCandidateSampler.pbtxt index cce62fa5d0cc4b..71466c56726990 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LearnedUnigramCandidateSampler.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LearnedUnigramCandidateSampler.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LearnedUnigramCandidateSampler" input_arg { name: "true_classes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LeftShift.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LeftShift.pbtxt index 94088ea60d6d12..c3f56bee3bb47a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LeftShift.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LeftShift.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LeftShift" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LegacyParallelInterleaveDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LegacyParallelInterleaveDatasetV2.pbtxt index 6d4d712d004182..49f6a5574721c8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LegacyParallelInterleaveDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LegacyParallelInterleaveDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LegacyParallelInterleaveDatasetV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Less.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Less.pbtxt index 818d92f302140e..e4f12455aa5ae3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Less.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Less.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Less" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LessEqual.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LessEqual.pbtxt index 477d8bf1d92fdd..9162a684069fd8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LessEqual.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LessEqual.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LessEqual" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Lgamma.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Lgamma.pbtxt index f43959c6afe82a..fcb0241217bdfa 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Lgamma.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Lgamma.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Lgamma" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LinSpace.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LinSpace.pbtxt index ad58235a544266..044ba244206f96 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LinSpace.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LinSpace.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LinSpace" input_arg { name: "start" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ListDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ListDataset.pbtxt index c095f6d1194705..2180d6d82a474c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ListDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ListDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ListDataset" input_arg { name: "tensors" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ListDiff.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ListDiff.pbtxt index 090baff95aefa0..39c3ee8606ccfd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ListDiff.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ListDiff.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ListDiff" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ListSnapshotChunksDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ListSnapshotChunksDataset.pbtxt index be35470141fb28..1b667052f0e069 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ListSnapshotChunksDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ListSnapshotChunksDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ListSnapshotChunksDataset" input_arg { name: "snapshot_path" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadAllTPUEmbeddingParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadAllTPUEmbeddingParameters.pbtxt index 68a146348b2758..5ac0c1cfb2b345 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadAllTPUEmbeddingParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadAllTPUEmbeddingParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadAllTPUEmbeddingParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadAndRemapMatrix.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadAndRemapMatrix.pbtxt index be16dec44ee721..54b4a68f2b344b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadAndRemapMatrix.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadAndRemapMatrix.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadAndRemapMatrix" input_arg { name: "ckpt_path" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadDataset.pbtxt index 41e306f86429a9..c46f54ef3c53a1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadDataset" input_arg { name: "path" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingADAMParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingADAMParameters.pbtxt index 38aec474b28889..5294493f6d1f30 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingADAMParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingADAMParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadTPUEmbeddingADAMParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingAdadeltaParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingAdadeltaParameters.pbtxt index c8d55510e0c983..c093049ee0d2f3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingAdadeltaParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingAdadeltaParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadTPUEmbeddingAdadeltaParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingAdagradMomentumParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingAdagradMomentumParameters.pbtxt index be33451441efe9..82f2cb2161fc7a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingAdagradMomentumParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingAdagradMomentumParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadTPUEmbeddingAdagradMomentumParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingAdagradParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingAdagradParameters.pbtxt index b4325420e97e5d..485ec861a1eae4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingAdagradParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingAdagradParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadTPUEmbeddingAdagradParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingCenteredRMSPropParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingCenteredRMSPropParameters.pbtxt index 42b53f9cf2e60c..72c02df3763afb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingCenteredRMSPropParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingCenteredRMSPropParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadTPUEmbeddingCenteredRMSPropParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingFTRLParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingFTRLParameters.pbtxt index 0bceec9c7897de..a429697249129a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingFTRLParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingFTRLParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadTPUEmbeddingFTRLParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingFrequencyEstimatorParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingFrequencyEstimatorParameters.pbtxt index 2485f0c96ca4f5..ab6af2875d5dc1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingFrequencyEstimatorParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingFrequencyEstimatorParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadTPUEmbeddingFrequencyEstimatorParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingMDLAdagradLightParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingMDLAdagradLightParameters.pbtxt index 2140a2b84b2b59..a175817a6e931d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingMDLAdagradLightParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingMDLAdagradLightParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadTPUEmbeddingMDLAdagradLightParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingMomentumParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingMomentumParameters.pbtxt index 3da833e5595b2a..0f135f7c6f0e25 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingMomentumParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingMomentumParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadTPUEmbeddingMomentumParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingProximalAdagradParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingProximalAdagradParameters.pbtxt index c272880303b6ad..10d611a38b2e78 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingProximalAdagradParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingProximalAdagradParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadTPUEmbeddingProximalAdagradParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingProximalYogiParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingProximalYogiParameters.pbtxt index 7feae76245ed14..8f51ed094ae8e1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingProximalYogiParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingProximalYogiParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadTPUEmbeddingProximalYogiParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingRMSPropParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingRMSPropParameters.pbtxt index 711f9e56dfe055..cfec4be6aa7d1c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingRMSPropParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingRMSPropParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadTPUEmbeddingRMSPropParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingStochasticGradientDescentParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingStochasticGradientDescentParameters.pbtxt index a61ee8b1e4bc4e..48b965b4ea3691 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingStochasticGradientDescentParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoadTPUEmbeddingStochasticGradientDescentParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoadTPUEmbeddingStochasticGradientDescentParameters" input_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Log.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Log.pbtxt index 7e5b08a05a1282..a16862c0735f93 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Log.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Log.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Log" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Log1p.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Log1p.pbtxt index b1f686eebe6982..1f8ba12957e726 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Log1p.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Log1p.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Log1p" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LogMatrixDeterminant.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LogMatrixDeterminant.pbtxt index aee2342ce070aa..3807cdda42595e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LogMatrixDeterminant.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LogMatrixDeterminant.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LogMatrixDeterminant" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LogSoftmax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LogSoftmax.pbtxt index 59748cdb10238b..92d2727bbe7e23 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LogSoftmax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LogSoftmax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LogSoftmax" input_arg { name: "logits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LogUniformCandidateSampler.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LogUniformCandidateSampler.pbtxt index 1066189f03b1b3..9ec45571159093 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LogUniformCandidateSampler.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LogUniformCandidateSampler.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LogUniformCandidateSampler" input_arg { name: "true_classes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LogicalAnd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LogicalAnd.pbtxt index bdad772540320e..b10b115df4f23f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LogicalAnd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LogicalAnd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LogicalAnd" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LogicalNot.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LogicalNot.pbtxt index cba489e4022795..5cf13ad8399c7b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LogicalNot.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LogicalNot.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LogicalNot" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LogicalOr.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LogicalOr.pbtxt index 9306d3d95b357a..635a66d8ba0634 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LogicalOr.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LogicalOr.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LogicalOr" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LookupTableExport.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LookupTableExport.pbtxt index f083b589d1cd21..6c56cdeb1de748 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LookupTableExport.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LookupTableExport.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LookupTableExport" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LookupTableExportV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LookupTableExportV2.pbtxt index 264b3867d11698..b86fd3a32ad47b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LookupTableExportV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LookupTableExportV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LookupTableExportV2" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LookupTableFind.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LookupTableFind.pbtxt index e9e33182da56de..5923b502abcb15 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LookupTableFind.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LookupTableFind.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LookupTableFind" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LookupTableFindV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LookupTableFindV2.pbtxt index 4fc069f0e48d0d..53cbafbeee082f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LookupTableFindV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LookupTableFindV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LookupTableFindV2" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LookupTableImport.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LookupTableImport.pbtxt index c3184809ed3962..73b53a5b6f5784 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LookupTableImport.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LookupTableImport.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LookupTableImport" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LookupTableImportV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LookupTableImportV2.pbtxt index aa684ec6b5bcb2..41c03b83c71ba1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LookupTableImportV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LookupTableImportV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LookupTableImportV2" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LookupTableInsert.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LookupTableInsert.pbtxt index b33cb4837a7bfc..b96cb478887922 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LookupTableInsert.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LookupTableInsert.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LookupTableInsert" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LookupTableInsertV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LookupTableInsertV2.pbtxt index c2e5b1a3cb448c..19d7d49b8600a3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LookupTableInsertV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LookupTableInsertV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LookupTableInsertV2" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LookupTableRemoveV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LookupTableRemoveV2.pbtxt index 325b4316da3810..d7fe0bb4dd4e40 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LookupTableRemoveV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LookupTableRemoveV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LookupTableRemoveV2" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LookupTableSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LookupTableSize.pbtxt index a878f30b9ea764..0d4bf61189fa33 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LookupTableSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LookupTableSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LookupTableSize" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LookupTableSizeV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LookupTableSizeV2.pbtxt index 6fb6f47f945262..511beedff01e3b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LookupTableSizeV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LookupTableSizeV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LookupTableSizeV2" input_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LoopCond.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LoopCond.pbtxt index 3be4701ef48460..7111fff007b5f2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LoopCond.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LoopCond.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LoopCond" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/LowerBound.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/LowerBound.pbtxt index 934c879c0907dc..b7d1dee7797707 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/LowerBound.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/LowerBound.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "LowerBound" input_arg { name: "sorted_inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Lu.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Lu.pbtxt index a5140b8af7a87e..59c28e09e704d4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Lu.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Lu.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Lu" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MakeIterator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MakeIterator.pbtxt index 84e12f49dba0e6..b11c2b9e1dd1b1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MakeIterator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MakeIterator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MakeIterator" input_arg { name: "dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MakeUnique.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MakeUnique.pbtxt index a6dbebb1e588a9..685f52d66eaead 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MakeUnique.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MakeUnique.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MakeUnique" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MapAndBatchDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MapAndBatchDataset.pbtxt index d4e5d9a29a79f2..8e7b6a32493801 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MapAndBatchDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MapAndBatchDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MapAndBatchDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MapClear.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MapClear.pbtxt index 261e8ba9b6f5b0..22c5e5fcad020b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MapClear.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MapClear.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MapClear" attr { name: "capacity" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MapDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MapDataset.pbtxt index 6840222b360148..b01b535e48d6fd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MapDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MapDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MapDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MapDefun.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MapDefun.pbtxt index 243d9866d0bbe0..7cb9d19231cfd9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MapDefun.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MapDefun.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MapDefun" input_arg { name: "arguments" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MapIncompleteSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MapIncompleteSize.pbtxt index b6c12953967540..ca9c629887fa58 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MapIncompleteSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MapIncompleteSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MapIncompleteSize" output_arg { name: "size" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MapPeek.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MapPeek.pbtxt index 13973572d1cf49..4a61cb9e40f47c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MapPeek.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MapPeek.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MapPeek" input_arg { name: "key" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MapSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MapSize.pbtxt index 5bffb06eb1ea59..6828f8fbb09a3f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MapSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MapSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MapSize" output_arg { name: "size" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MapStage.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MapStage.pbtxt index c34737c065777f..4ad2131a1f1844 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MapStage.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MapStage.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MapStage" input_arg { name: "key" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MapUnstage.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MapUnstage.pbtxt index f288ab5f51cdb2..9901130961c8a1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MapUnstage.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MapUnstage.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MapUnstage" input_arg { name: "key" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MapUnstageNoKey.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MapUnstageNoKey.pbtxt index ce11b85c5063cb..ee4cca51346065 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MapUnstageNoKey.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MapUnstageNoKey.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MapUnstageNoKey" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatMul.pbtxt index 8f79fa11000f7f..42a2c794353672 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatMul" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatchingFiles.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatchingFiles.pbtxt index e374694de6f10a..3f8af5f3226375 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatchingFiles.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatchingFiles.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatchingFiles" input_arg { name: "pattern" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatchingFilesDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatchingFilesDataset.pbtxt index 916f7e1e60a1c5..bf43730d6dc87f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatchingFilesDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatchingFilesDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatchingFilesDataset" input_arg { name: "patterns" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixBandPart.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixBandPart.pbtxt index c6ab4921aa4f70..c25aa9615c51d1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixBandPart.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixBandPart.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixBandPart" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixDeterminant.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixDeterminant.pbtxt index 791c7f7a5afc57..4dd524d4894ed4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixDeterminant.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixDeterminant.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixDeterminant" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixDiag.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixDiag.pbtxt index 299678fef206a3..9b0ddb0285d0cd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixDiag.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixDiag.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixDiag" input_arg { name: "diagonal" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagPart.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagPart.pbtxt index 4952fc6b2f2acc..efb1e18fccba39 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagPart.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagPart.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixDiagPart" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagPartV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagPartV2.pbtxt index 3325a0d4069336..f709c6d5eb556c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagPartV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagPartV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixDiagPartV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagPartV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagPartV3.pbtxt index c7df214d46ed30..75a1307f279142 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagPartV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagPartV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixDiagPartV3" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagV2.pbtxt index 4fafe31b76cc93..3f6aa1e6a72fa5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixDiagV2" input_arg { name: "diagonal" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagV3.pbtxt index c8829be1bc41f8..793efcec3b6425 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixDiagV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixDiagV3" input_arg { name: "diagonal" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixExponential.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixExponential.pbtxt index d173fbe0515836..008291a4cafd64 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixExponential.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixExponential.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixExponential" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixInverse.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixInverse.pbtxt index 6b3befc4dd031f..81d35ad1d082ae 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixInverse.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixInverse.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixInverse" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixLogarithm.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixLogarithm.pbtxt index c3cc81e38c6cb0..0a87e5905d0123 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixLogarithm.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixLogarithm.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixLogarithm" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixSetDiag.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixSetDiag.pbtxt index 0dcfca6102cc1b..e8c08f8d295192 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixSetDiag.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixSetDiag.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixSetDiag" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixSetDiagV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixSetDiagV2.pbtxt index c11740fbd69a34..1147220c00c774 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixSetDiagV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixSetDiagV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixSetDiagV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixSetDiagV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixSetDiagV3.pbtxt index 1490839cf0b3f6..d5a6af8b119291 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixSetDiagV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixSetDiagV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixSetDiagV3" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixSolve.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixSolve.pbtxt index 50cf802d9ff263..2a28fa0adb0377 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixSolve.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixSolve.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixSolve" input_arg { name: "matrix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixSolveLs.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixSolveLs.pbtxt index 082064ef822021..5df48fc28e3b29 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixSolveLs.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixSolveLs.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixSolveLs" input_arg { name: "matrix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixSquareRoot.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixSquareRoot.pbtxt index b7e690181cad3e..32ff859e8f9826 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixSquareRoot.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixSquareRoot.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixSquareRoot" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MatrixTriangularSolve.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MatrixTriangularSolve.pbtxt index e925e1189322b4..915e582d00a0e1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MatrixTriangularSolve.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MatrixTriangularSolve.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MatrixTriangularSolve" input_arg { name: "matrix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Max.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Max.pbtxt index 91262d8e435aee..bf147acf0f405f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Max.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Max.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Max" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MaxIntraOpParallelismDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MaxIntraOpParallelismDataset.pbtxt index 946f2f18f84161..85547917acc6e3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MaxIntraOpParallelismDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MaxIntraOpParallelismDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MaxIntraOpParallelismDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MaxPool.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MaxPool.pbtxt index 2fd8174576359a..f4fd1cccf29f7d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MaxPool.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MaxPool.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MaxPool" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MaxPool3D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MaxPool3D.pbtxt index 928c2c0aeaf32b..7af4fca0e93e15 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MaxPool3D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MaxPool3D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MaxPool3D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MaxPool3DGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MaxPool3DGrad.pbtxt index 8e8f10e4fe80e7..77edcb4c89887a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MaxPool3DGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MaxPool3DGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MaxPool3DGrad" input_arg { name: "orig_input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MaxPool3DGradGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MaxPool3DGradGrad.pbtxt index 5012c312b1bf1f..55d26c13c9cab9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MaxPool3DGradGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MaxPool3DGradGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MaxPool3DGradGrad" input_arg { name: "orig_input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGrad.pbtxt index 2a36894500bdd8..131a3633cf98f7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MaxPoolGrad" input_arg { name: "orig_input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradGrad.pbtxt index 9589f707f4c481..9b1f4de08ea069 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MaxPoolGradGrad" input_arg { name: "orig_input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradGradV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradGradV2.pbtxt index c716043ae9ac56..fba1ab57dc6b28 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradGradV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradGradV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MaxPoolGradGradV2" input_arg { name: "orig_input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradGradWithArgmax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradGradWithArgmax.pbtxt index dc96f707c66e8a..3c3cdbb90d1287 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradGradWithArgmax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradGradWithArgmax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MaxPoolGradGradWithArgmax" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradV2.pbtxt index 42d63ab79b1e60..7e38cf840dd0e0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MaxPoolGradV2" input_arg { name: "orig_input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradWithArgmax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradWithArgmax.pbtxt index 1d30e8703369a6..7c3ab4a0cd05c6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradWithArgmax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolGradWithArgmax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MaxPoolGradWithArgmax" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolV2.pbtxt index 82dc586d6d487d..3ef7da8d9d9848 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MaxPoolV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolWithArgmax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolWithArgmax.pbtxt index 0a4a4df9c8135d..d33bbd2f70769e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MaxPoolWithArgmax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MaxPoolWithArgmax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MaxPoolWithArgmax" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Maximum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Maximum.pbtxt index 32bbcb645478ef..c6ee10114c63f0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Maximum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Maximum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Maximum" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Mean.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Mean.pbtxt index 10769852ec7d10..e0b5f145616867 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Mean.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Mean.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Mean" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Merge.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Merge.pbtxt index 23610f501a46a0..d08f9cc55e94f0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Merge.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Merge.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Merge" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MergeDedupData.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MergeDedupData.pbtxt index 147341c8f374af..a5bcb48c34a0f1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MergeDedupData.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MergeDedupData.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MergeDedupData" input_arg { name: "integer_tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MergeSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MergeSummary.pbtxt index ffe893e53bfa8a..d9b14d4511093e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MergeSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MergeSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MergeSummary" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MergeV2Checkpoints.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MergeV2Checkpoints.pbtxt index 96b092e55660ac..2a6b60b4e531c9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MergeV2Checkpoints.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MergeV2Checkpoints.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MergeV2Checkpoints" input_arg { name: "checkpoint_prefixes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Mfcc.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Mfcc.pbtxt index 8f4577250c9f3d..4c22eb8c69fe03 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Mfcc.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Mfcc.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Mfcc" input_arg { name: "spectrogram" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Min.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Min.pbtxt index bb0cd548c8a13a..4959b5e8d583b9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Min.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Min.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Min" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Minimum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Minimum.pbtxt index 756451b0b748d9..01cc483ba54dcb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Minimum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Minimum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Minimum" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MirrorPad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MirrorPad.pbtxt index 0642f97fafe52a..bf64a6ca5040b1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MirrorPad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MirrorPad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MirrorPad" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MirrorPadGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MirrorPadGrad.pbtxt index d1503a556c6127..b544cfbe72e7c4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MirrorPadGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MirrorPadGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MirrorPadGrad" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MlirPassthroughOp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MlirPassthroughOp.pbtxt index 4f7ddeb99acaf0..5990eb2850281d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MlirPassthroughOp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MlirPassthroughOp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MlirPassthroughOp" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Mod.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Mod.pbtxt index 37a302eaa25de1..6c39ed683f6bee 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Mod.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Mod.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Mod" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ModelDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ModelDataset.pbtxt index d3228c40e1d4dc..14ac940f6a7764 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ModelDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ModelDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ModelDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Mul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Mul.pbtxt index d13b3adae90bd6..ef592669ff62cd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Mul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Mul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Mul" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MulNoNan.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MulNoNan.pbtxt index ba06844a75700e..ca5c92fb15bfde 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MulNoNan.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MulNoNan.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MulNoNan" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIterator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIterator.pbtxt index d7067ebf6eab43..d85c553f186f3b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIterator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIterator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MultiDeviceIterator" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorFromStringHandle.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorFromStringHandle.pbtxt index 74acb5eb9ce0c9..384b1477dbc599 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorFromStringHandle.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorFromStringHandle.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MultiDeviceIteratorFromStringHandle" input_arg { name: "string_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorGetNextFromShard.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorGetNextFromShard.pbtxt index 8cf4fb6c28514c..2e007c25b2337d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorGetNextFromShard.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorGetNextFromShard.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MultiDeviceIteratorGetNextFromShard" input_arg { name: "multi_device_iterator" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorInit.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorInit.pbtxt index a2829e514c5beb..a011997186af14 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorInit.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorInit.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MultiDeviceIteratorInit" input_arg { name: "dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorToStringHandle.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorToStringHandle.pbtxt index 6f332755692ba1..d7780d79687166 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorToStringHandle.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MultiDeviceIteratorToStringHandle.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MultiDeviceIteratorToStringHandle" input_arg { name: "multi_device_iterator" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Multinomial.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Multinomial.pbtxt index ee55af8dddac95..c258fa6e7ed4c8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Multinomial.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Multinomial.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Multinomial" input_arg { name: "logits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MutableDenseHashTable.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MutableDenseHashTable.pbtxt index 4ebb385704164c..eecaeb2d4fb570 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MutableDenseHashTable.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MutableDenseHashTable.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MutableDenseHashTable" input_arg { name: "empty_key" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MutableDenseHashTableV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MutableDenseHashTableV2.pbtxt index 861d25c0c1b3f4..739079ced16cb6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MutableDenseHashTableV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MutableDenseHashTableV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MutableDenseHashTableV2" input_arg { name: "empty_key" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MutableHashTable.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MutableHashTable.pbtxt index c1f2e216da5b83..a8ecc34cb18bd0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MutableHashTable.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MutableHashTable.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MutableHashTable" output_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MutableHashTableOfTensors.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MutableHashTableOfTensors.pbtxt index 7b71665fb96804..bdec2ff5939b39 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MutableHashTableOfTensors.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MutableHashTableOfTensors.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MutableHashTableOfTensors" output_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MutableHashTableOfTensorsV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MutableHashTableOfTensorsV2.pbtxt index d32880aba77e28..dc46d075df3d94 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MutableHashTableOfTensorsV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MutableHashTableOfTensorsV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MutableHashTableOfTensorsV2" output_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MutableHashTableV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MutableHashTableV2.pbtxt index eb690ee06e1aa9..610214dfa76e66 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MutableHashTableV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MutableHashTableV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MutableHashTableV2" output_arg { name: "table_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MutexLock.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MutexLock.pbtxt index 16f1ad6e0a1049..6b5747cd4e383e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MutexLock.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MutexLock.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MutexLock" input_arg { name: "mutex" diff --git a/tensorflow/core/ops/compat/ops_history_v2/MutexV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MutexV2.pbtxt index ef04c075cd8d6e..b20f9b1e7996b3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/MutexV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/MutexV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "MutexV2" output_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NcclAllReduce.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NcclAllReduce.pbtxt index 59ab7c672eb6cc..80f91edef1d237 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NcclAllReduce.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NcclAllReduce.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NcclAllReduce" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NcclBroadcast.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NcclBroadcast.pbtxt index 46a2cab258fbf0..02a5487d1acf6f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NcclBroadcast.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NcclBroadcast.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NcclBroadcast" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NcclReduce.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NcclReduce.pbtxt index 141189947600f3..507f92cff2c87e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NcclReduce.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NcclReduce.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NcclReduce" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Ndtri.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Ndtri.pbtxt index c1549d77e8e841..a7a923f780db40 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Ndtri.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Ndtri.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Ndtri" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NearestNeighbors.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NearestNeighbors.pbtxt index 42b7b7c906bd85..5d1e5ed57659ac 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NearestNeighbors.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NearestNeighbors.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NearestNeighbors" input_arg { name: "points" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Neg.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Neg.pbtxt index 22a09654a9b7c3..864d0257fe4b2d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Neg.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Neg.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Neg" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NegTrain.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NegTrain.pbtxt index d97f107378d3b9..f12529fd6328b8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NegTrain.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NegTrain.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NegTrain" input_arg { name: "w_in" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NextAfter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NextAfter.pbtxt index 1cbacb54fe2bec..70e4afe6c77db5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NextAfter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NextAfter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NextAfter" input_arg { name: "x1" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NextIteration.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NextIteration.pbtxt index 6044c37879b4d2..7186fc0b684029 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NextIteration.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NextIteration.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NextIteration" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NoOp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NoOp.pbtxt index 6c06e127e1bd6b..8f0370633fc1ce 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NoOp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NoOp.pbtxt @@ -1,3 +1,3 @@ -op { +op { name: "NoOp" } diff --git a/tensorflow/core/ops/compat/ops_history_v2/NonDeterministicInts.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NonDeterministicInts.pbtxt index 470bdf155b510a..3fa5aa4a605c7d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NonDeterministicInts.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NonDeterministicInts.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NonDeterministicInts" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppression.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppression.pbtxt index a63ddf63dbac33..ded8b3728f4613 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppression.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppression.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NonMaxSuppression" input_arg { name: "boxes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV2.pbtxt index 0ff1ffcb5ae632..90c23bc0457dcc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NonMaxSuppressionV2" input_arg { name: "boxes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV3.pbtxt index bf9aac7c28f2c5..daeffd841b8e00 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NonMaxSuppressionV3" input_arg { name: "boxes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV4.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV4.pbtxt index ee1f325a9bd8d6..07ca92fef71df1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV4.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV4.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NonMaxSuppressionV4" input_arg { name: "boxes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV5.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV5.pbtxt index fbfe05a13141a1..cabec767a15617 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV5.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionV5.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NonMaxSuppressionV5" input_arg { name: "boxes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionWithOverlaps.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionWithOverlaps.pbtxt index a150d688af1b69..d89eeee4a4a34b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionWithOverlaps.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NonMaxSuppressionWithOverlaps.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NonMaxSuppressionWithOverlaps" input_arg { name: "overlaps" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NonSerializableDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NonSerializableDataset.pbtxt index b47c4157f9dbde..5fbd4bdb656c57 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NonSerializableDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NonSerializableDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NonSerializableDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NotEqual.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NotEqual.pbtxt index 738fc0de0114d8..099ef75d622d83 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NotEqual.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NotEqual.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NotEqual" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/NthElement.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/NthElement.pbtxt index a1c7a12ba00126..c9e797273df119 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/NthElement.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/NthElement.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "NthElement" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OneHot.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OneHot.pbtxt index 83163e8685a19e..0c1cfb6d686a95 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OneHot.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OneHot.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OneHot" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OneShotIterator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OneShotIterator.pbtxt index 362e5b271b172e..a2969bcc0e36c9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OneShotIterator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OneShotIterator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OneShotIterator" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OnesLike.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OnesLike.pbtxt index 6827c87d832660..d4609438139b35 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OnesLike.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OnesLike.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OnesLike" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OptimizeDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OptimizeDataset.pbtxt index 4a2b3f7420fd6c..5b40b213eb5dc8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OptimizeDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OptimizeDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OptimizeDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt index d647c7bc1f0d58..7e1537ebe57a3a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OptimizeDatasetV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OptionalFromValue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OptionalFromValue.pbtxt index 3a9510a674c426..ddfc9c43d348c3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OptionalFromValue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OptionalFromValue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OptionalFromValue" input_arg { name: "components" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OptionalGetValue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OptionalGetValue.pbtxt index e5f3ab817ab570..e7364a1014afe8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OptionalGetValue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OptionalGetValue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OptionalGetValue" input_arg { name: "optional" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OptionalHasValue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OptionalHasValue.pbtxt index e744d908129750..da76333cecbf70 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OptionalHasValue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OptionalHasValue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OptionalHasValue" input_arg { name: "optional" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OptionalNone.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OptionalNone.pbtxt index a051d978224361..c47d6a745481e9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OptionalNone.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OptionalNone.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OptionalNone" output_arg { name: "optional" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OptionsDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OptionsDataset.pbtxt index 6e3c0a9fdb22a7..fc63e5ee2951cf 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OptionsDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OptionsDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OptionsDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapClear.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapClear.pbtxt index 2f77ea5e73bbb4..726e26e6172f05 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapClear.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapClear.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OrderedMapClear" attr { name: "capacity" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapIncompleteSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapIncompleteSize.pbtxt index a6439051fe23a0..9a9572a51beea8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapIncompleteSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapIncompleteSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OrderedMapIncompleteSize" output_arg { name: "size" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapPeek.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapPeek.pbtxt index 8e61ac23f9bb73..0d9fd20fe077dd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapPeek.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapPeek.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OrderedMapPeek" input_arg { name: "key" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapSize.pbtxt index 46777f1d7e7e8e..ea07d7e4215fb8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OrderedMapSize" output_arg { name: "size" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapStage.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapStage.pbtxt index badbf44b424839..76af456ed8372b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapStage.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapStage.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OrderedMapStage" input_arg { name: "key" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapUnstage.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapUnstage.pbtxt index f3d4dced87d04e..c09b4be94f4908 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapUnstage.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapUnstage.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OrderedMapUnstage" input_arg { name: "key" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapUnstageNoKey.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapUnstageNoKey.pbtxt index 295f8258d599c4..bc3e8c7da30200 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OrderedMapUnstageNoKey.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OrderedMapUnstageNoKey.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OrderedMapUnstageNoKey" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeue.pbtxt index b802b78283c0ee..29dc8b5c5879bd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OutfeedDequeue" output_arg { name: "output" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeueTuple.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeueTuple.pbtxt index 580babfb502d44..3e0d31078b3ec6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeueTuple.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeueTuple.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OutfeedDequeueTuple" output_arg { name: "outputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeueTupleV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeueTupleV2.pbtxt index e8d13591681564..744744b4545cad 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeueTupleV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeueTupleV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OutfeedDequeueTupleV2" input_arg { name: "device_ordinal" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeueV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeueV2.pbtxt index 5863aae467fed3..c5ca1f31a676cb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeueV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OutfeedDequeueV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OutfeedDequeueV2" input_arg { name: "device_ordinal" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OutfeedEnqueue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OutfeedEnqueue.pbtxt index 4836db6b12b31f..d8c16f4d62978c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OutfeedEnqueue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OutfeedEnqueue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OutfeedEnqueue" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/OutfeedEnqueueTuple.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OutfeedEnqueueTuple.pbtxt index e7464383175911..0bf1a5ba4804eb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/OutfeedEnqueueTuple.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/OutfeedEnqueueTuple.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "OutfeedEnqueueTuple" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Pack.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Pack.pbtxt index 12eecb122fbe6a..65eb67509d3fd8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Pack.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Pack.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Pack" input_arg { name: "values" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Pad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Pad.pbtxt index 5d1b80e2f3976c..1c7b9c7b457b3c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Pad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Pad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Pad" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PadV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PadV2.pbtxt index aa1a0fb109be72..463cb71f207ed0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PadV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PadV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PadV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PaddedBatchDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PaddedBatchDataset.pbtxt index f67cb9f4e4e5af..a118fc102f10a2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PaddedBatchDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PaddedBatchDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PaddedBatchDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PaddedBatchDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PaddedBatchDatasetV2.pbtxt index 1142cbf9eb5bed..4ae5a66624a65d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PaddedBatchDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PaddedBatchDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PaddedBatchDatasetV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PaddingFIFOQueue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PaddingFIFOQueue.pbtxt index e03ca8fecfec5f..f5eca52ba1927e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PaddingFIFOQueue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PaddingFIFOQueue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PaddingFIFOQueue" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PaddingFIFOQueueV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PaddingFIFOQueueV2.pbtxt index 12404b10303679..c398f9ee3a8a14 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PaddingFIFOQueueV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PaddingFIFOQueueV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PaddingFIFOQueueV2" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParallelBatchDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParallelBatchDataset.pbtxt index 023d855a4fa2fd..5c160cae2ddad7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParallelBatchDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParallelBatchDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParallelBatchDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParallelConcat.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParallelConcat.pbtxt index 154f48829d78d4..b0d1cc3918574a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParallelConcat.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParallelConcat.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParallelConcat" input_arg { name: "values" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParallelDynamicStitch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParallelDynamicStitch.pbtxt index 8566fc274f031d..9ab18a1ba5e69e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParallelDynamicStitch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParallelDynamicStitch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParallelDynamicStitch" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParallelFilterDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParallelFilterDataset.pbtxt index e33694113b5105..1c895e2767636e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParallelFilterDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParallelFilterDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParallelFilterDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDataset.pbtxt index 730f9b76b490be..f278cb0efc6dfb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParallelInterleaveDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDatasetV2.pbtxt index 507463da15bb97..110573b42ed39f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParallelInterleaveDatasetV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDatasetV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDatasetV3.pbtxt index 3f73fc38abc0d0..096460fb1efcdb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDatasetV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDatasetV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParallelInterleaveDatasetV3" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDatasetV4.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDatasetV4.pbtxt index 65d63780827683..94f9ae0f6eebd1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDatasetV4.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParallelInterleaveDatasetV4.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParallelInterleaveDatasetV4" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParallelMapDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParallelMapDataset.pbtxt index 20ac8a4dcc3d51..991e4192983c3d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParallelMapDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParallelMapDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParallelMapDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParallelMapDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParallelMapDatasetV2.pbtxt index 87012c3c49f3d6..55e73b740adefd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParallelMapDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParallelMapDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParallelMapDatasetV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParameterizedTruncatedNormal.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParameterizedTruncatedNormal.pbtxt index 8bca662ec52d00..1f96da6f7886e6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParameterizedTruncatedNormal.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParameterizedTruncatedNormal.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParameterizedTruncatedNormal" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParseExample.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParseExample.pbtxt index b1020d356343db..a1e35bde86b0a1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParseExample.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParseExample.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParseExample" input_arg { name: "serialized" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParseExampleDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParseExampleDataset.pbtxt index 4e12e94a6a28c8..4dc9ac1efb6cd3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParseExampleDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParseExampleDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParseExampleDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParseExampleDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParseExampleDatasetV2.pbtxt index 8af77a0bd59095..59632a160b121c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParseExampleDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParseExampleDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParseExampleDatasetV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParseExampleV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParseExampleV2.pbtxt index cc2706b046e368..0d7e1d37e23ac9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParseExampleV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParseExampleV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParseExampleV2" input_arg { name: "serialized" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParseSequenceExample.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParseSequenceExample.pbtxt index 8060fe90a3553e..03ac5be8d26160 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParseSequenceExample.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParseSequenceExample.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParseSequenceExample" input_arg { name: "serialized" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParseSequenceExampleV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParseSequenceExampleV2.pbtxt index 07dfc9d7467e14..7ba59734893b0b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParseSequenceExampleV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParseSequenceExampleV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParseSequenceExampleV2" input_arg { name: "serialized" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParseSingleExample.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParseSingleExample.pbtxt index 0ea9857ac83b6f..aaa69af4f62ac6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParseSingleExample.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParseSingleExample.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParseSingleExample" input_arg { name: "serialized" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParseSingleSequenceExample.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParseSingleSequenceExample.pbtxt index c0f76a20b44160..a0f52dbdd1f406 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParseSingleSequenceExample.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParseSingleSequenceExample.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParseSingleSequenceExample" input_arg { name: "serialized" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ParseTensor.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ParseTensor.pbtxt index 20232f99165c8d..63d1f1292eed08 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ParseTensor.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ParseTensor.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ParseTensor" input_arg { name: "serialized" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PartitionedCall.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PartitionedCall.pbtxt index 59752220042a63..b51bd1de9fce0d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PartitionedCall.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PartitionedCall.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PartitionedCall" input_arg { name: "args" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Placeholder.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Placeholder.pbtxt index ec0fdcf19f7ebd..7c0f57a94e177d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Placeholder.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Placeholder.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Placeholder" output_arg { name: "output" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PlaceholderV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PlaceholderV2.pbtxt index e9d5b4840cb8e0..b2cd20b238fa5e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PlaceholderV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PlaceholderV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PlaceholderV2" output_arg { name: "output" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PlaceholderWithDefault.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PlaceholderWithDefault.pbtxt index 3d02d762312073..79a2ffb4492a6a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PlaceholderWithDefault.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PlaceholderWithDefault.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PlaceholderWithDefault" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Polygamma.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Polygamma.pbtxt index 3f20bc10cd2332..6bf0d9ba4cf9cc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Polygamma.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Polygamma.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Polygamma" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PopulationCount.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PopulationCount.pbtxt index 97e98373418383..d66c1ac00ae122 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PopulationCount.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PopulationCount.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PopulationCount" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Pow.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Pow.pbtxt index ad30b536cd7742..b1cc1b8f479801 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Pow.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Pow.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Pow" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PrefetchDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PrefetchDataset.pbtxt index 92926f6523d1ff..81953a0e75c0c9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PrefetchDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PrefetchDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PrefetchDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Prelinearize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Prelinearize.pbtxt index f74c9b381dce7d..b5ed810c25a426 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Prelinearize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Prelinearize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Prelinearize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PrelinearizeTuple.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PrelinearizeTuple.pbtxt index 92de7f372a74d5..bb1ae7d3e2f828 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PrelinearizeTuple.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PrelinearizeTuple.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PrelinearizeTuple" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PreventGradient.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PreventGradient.pbtxt index 19a95b09d5e645..1649fc808aa935 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PreventGradient.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PreventGradient.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PreventGradient" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Print.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Print.pbtxt index 1966093e81c6ad..fbbb514b177737 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Print.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Print.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Print" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PrintV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PrintV2.pbtxt index 8f748465b810bd..c5942f0a6145a5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PrintV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PrintV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PrintV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PriorityQueue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PriorityQueue.pbtxt index af0f36d238d9e6..b44d83dfb20036 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PriorityQueue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PriorityQueue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PriorityQueue" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PriorityQueueV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PriorityQueueV2.pbtxt index ab426238988baf..a4e7c750b65a14 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PriorityQueueV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PriorityQueueV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PriorityQueueV2" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PrivateThreadPoolDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PrivateThreadPoolDataset.pbtxt index 483f4b88fd93f7..c16c1eb164728d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PrivateThreadPoolDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PrivateThreadPoolDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PrivateThreadPoolDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Prod.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Prod.pbtxt index d3c71eacd78cf9..fe9126bca16ed1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Prod.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Prod.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Prod" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PyFunc.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PyFunc.pbtxt index de5661de7e2150..987f028051ea0d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PyFunc.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PyFunc.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PyFunc" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/PyFuncStateless.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/PyFuncStateless.pbtxt index fa2ac3e446b1a7..2a587d53d937c8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/PyFuncStateless.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/PyFuncStateless.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "PyFuncStateless" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Qr.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Qr.pbtxt index 251f8ff7f7f0d6..8319528f8a016e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Qr.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Qr.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Qr" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantize.pbtxt index 30d0b750a8df0a..fb662f7057c2f0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizeAndDequantize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV2.pbtxt index d1fa4fba7fd4e5..d946e753cf7bbf 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizeAndDequantizeV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV3.pbtxt index 8d6a0317190cfe..cb7762a9deda20 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizeAndDequantizeV3" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV4.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV4.pbtxt index fca137de6cd4bc..2a49131faaf2d8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV4.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV4.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizeAndDequantizeV4" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV4Grad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV4Grad.pbtxt index 71df7854231502..0bbe87452b145d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV4Grad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizeAndDequantizeV4Grad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizeAndDequantizeV4Grad" input_arg { name: "gradients" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizeDownAndShrinkRange.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizeDownAndShrinkRange.pbtxt index ea6737ee390894..42783d3a14ebb4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizeDownAndShrinkRange.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizeDownAndShrinkRange.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizeDownAndShrinkRange" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizeV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizeV2.pbtxt index d164717d5aaf39..37cd1384176d6e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizeV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizeV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizeV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedAdd.pbtxt index be3f511f03aec3..4532bc23d50a2b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedAdd" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedAvgPool.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedAvgPool.pbtxt index 44e51eb39509f7..0ae3390d303c11 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedAvgPool.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedAvgPool.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedAvgPool" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedBatchNormWithGlobalNormalization.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedBatchNormWithGlobalNormalization.pbtxt index be4b9356265362..832b8ba577577d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedBatchNormWithGlobalNormalization.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedBatchNormWithGlobalNormalization.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedBatchNormWithGlobalNormalization" input_arg { name: "t" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedBiasAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedBiasAdd.pbtxt index 3c94fb54f0a497..b479c2c54e4d26 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedBiasAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedBiasAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedBiasAdd" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConcat.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConcat.pbtxt index 2752ebfca09e38..449f588ac8f498 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConcat.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConcat.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedConcat" input_arg { name: "concat_dim" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2D.pbtxt index 3680e575ace2f3..b1cf1c8d334182 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedConv2D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DAndRelu.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DAndRelu.pbtxt index c9f1696d1ecf67..229e4c436dd622 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DAndRelu.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DAndRelu.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedConv2DAndRelu" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DAndReluAndRequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DAndReluAndRequantize.pbtxt index 1cbc11d524e167..bc566896f53dfe 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DAndReluAndRequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DAndReluAndRequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedConv2DAndReluAndRequantize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DAndRequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DAndRequantize.pbtxt index d3166c425a984f..5d26709f14e31c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DAndRequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DAndRequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedConv2DAndRequantize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DPerChannel.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DPerChannel.pbtxt index 06c3d5f115fab3..93640944477061 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DPerChannel.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DPerChannel.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedConv2DPerChannel" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBias.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBias.pbtxt index f059cf786d14f0..8372a882260457 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBias.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBias.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedConv2DWithBias" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasAndRelu.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasAndRelu.pbtxt index 4eef8cb4c4f3c2..af0ce39a844546 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasAndRelu.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasAndRelu.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedConv2DWithBiasAndRelu" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasAndReluAndRequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasAndReluAndRequantize.pbtxt index 174a196a036974..599f19e666dcdc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasAndReluAndRequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasAndReluAndRequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedConv2DWithBiasAndReluAndRequantize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasAndRequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasAndRequantize.pbtxt index 96ed2852b68046..8cf8fbb2eae658 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasAndRequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasAndRequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedConv2DWithBiasAndRequantize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasSignedSumAndReluAndRequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasSignedSumAndReluAndRequantize.pbtxt index 37f884f1cb0e88..e46786a9a74a86 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasSignedSumAndReluAndRequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasSignedSumAndReluAndRequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasSumAndRelu.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasSumAndRelu.pbtxt index ed100f68a9b58a..d74439b670ee37 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasSumAndRelu.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasSumAndRelu.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedConv2DWithBiasSumAndRelu" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasSumAndReluAndRequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasSumAndReluAndRequantize.pbtxt index d4a3c49b46c5c7..70c2366a19f2bd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasSumAndReluAndRequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedConv2DWithBiasSumAndReluAndRequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedConv2DWithBiasSumAndReluAndRequantize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2D.pbtxt index 6b4163c799cdfa..f88bba239bf07b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedDepthwiseConv2D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2DWithBias.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2DWithBias.pbtxt index c9e584543f3767..4faf839b3b6651 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2DWithBias.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2DWithBias.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedDepthwiseConv2DWithBias" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2DWithBiasAndRelu.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2DWithBiasAndRelu.pbtxt index a4bb2fcd372ded..6a4b514e23ffec 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2DWithBiasAndRelu.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2DWithBiasAndRelu.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedDepthwiseConv2DWithBiasAndRelu" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize.pbtxt index c24ba3165a8aca..3de6c3f9e28f47 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedInstanceNorm.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedInstanceNorm.pbtxt index 720358f1bd01d7..98136d82ebda45 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedInstanceNorm.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedInstanceNorm.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedInstanceNorm" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMul.pbtxt index d1332abc5169ca..7e4707a316f80d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedMatMul" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBias.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBias.pbtxt index 55c80678a6eb90..a59adb7f78c6d7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBias.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBias.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedMatMulWithBias" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndDequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndDequantize.pbtxt index 6c6370a1ac0191..04ecfdbd855f83 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndDequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndDequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedMatMulWithBiasAndDequantize" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndRelu.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndRelu.pbtxt index ecdeb1d05d1c90..cd0acb9d721657 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndRelu.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndRelu.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedMatMulWithBiasAndRelu" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndReluAndRequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndReluAndRequantize.pbtxt index 140ed993453766..b591d3fb37c868 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndReluAndRequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndReluAndRequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedMatMulWithBiasAndReluAndRequantize" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndRequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndRequantize.pbtxt index 16b180475e46e2..1aab9762eea036 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndRequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMatMulWithBiasAndRequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedMatMulWithBiasAndRequantize" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMaxPool.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMaxPool.pbtxt index 9164c0fcfc6160..47d6ac80518ef9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMaxPool.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMaxPool.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedMaxPool" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMul.pbtxt index 97c025672c48b2..795ab1341d9c67 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedMul" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedRelu.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedRelu.pbtxt index 77fb5c2eeb9041..724d8b3946bdb1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedRelu.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedRelu.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedRelu" input_arg { name: "features" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedRelu6.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedRelu6.pbtxt index b76f1159ec4f16..0f389d5eae9337 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedRelu6.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedRelu6.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedRelu6" input_arg { name: "features" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedReluX.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedReluX.pbtxt index 772ac59eba26ff..9ee6f0d2e274b9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedReluX.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedReluX.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedReluX" input_arg { name: "features" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedReshape.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedReshape.pbtxt index fa7b1fee5cf5f1..f54db98943cc14 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedReshape.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedReshape.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedReshape" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QuantizedResizeBilinear.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QuantizedResizeBilinear.pbtxt index aca635c5016a1a..bee577ed23b411 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QuantizedResizeBilinear.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QuantizedResizeBilinear.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QuantizedResizeBilinear" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueClose.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueClose.pbtxt index 0a4a305cc97568..582eeccd6a263a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueClose.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueClose.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueClose" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueCloseV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueCloseV2.pbtxt index ab9c620c7fb7b4..e0544c13e654b4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueCloseV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueCloseV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueCloseV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueDequeue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueDequeue.pbtxt index f4a3b6fe195c42..f06745f20fe456 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueDequeue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueDequeue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueDequeue" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueMany.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueMany.pbtxt index 986897a5bcd27f..374ecfb18a87c3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueMany.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueMany.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueDequeueMany" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueManyV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueManyV2.pbtxt index 355b7905840281..f3ebc6c7e59288 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueManyV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueManyV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueDequeueManyV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueUpTo.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueUpTo.pbtxt index 6e006006a784f5..6fa30ac810a0e5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueUpTo.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueUpTo.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueDequeueUpTo" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueUpToV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueUpToV2.pbtxt index 5901d313926bdf..2016cc7f04a0ad 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueUpToV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueUpToV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueDequeueUpToV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueV2.pbtxt index 44a893ea7c3a00..e338ccbd355a76 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueDequeueV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueDequeueV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueue.pbtxt index 689046ebae4e2d..fb94d288f2400e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueEnqueue" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueueMany.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueueMany.pbtxt index f3cbf429f28639..2d9582430725b9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueueMany.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueueMany.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueEnqueueMany" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueueManyV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueueManyV2.pbtxt index 159b3012075455..c327d27e2f11f6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueueManyV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueueManyV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueEnqueueManyV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueueV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueueV2.pbtxt index e6bf061965d84b..da8cdd3cd67c48 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueueV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueEnqueueV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueEnqueueV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueIsClosed.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueIsClosed.pbtxt index 42426552b51808..11a421b27c85e3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueIsClosed.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueIsClosed.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueIsClosed" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueIsClosedV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueIsClosedV2.pbtxt index efe540401c9591..7cf1fde1bcee16 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueIsClosedV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueIsClosedV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueIsClosedV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueSize.pbtxt index 4dfe7aff427356..d2a49624f20ded 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueSize" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/QueueSizeV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/QueueSizeV2.pbtxt index c7e11d7cf326e4..46eb229a3fa7e6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/QueueSizeV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/QueueSizeV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "QueueSizeV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RFFT.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RFFT.pbtxt index 0879b0cfe5f2ae..02456ea217b791 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RFFT.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RFFT.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RFFT" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RFFT2D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RFFT2D.pbtxt index 79ed06b41d4dea..f3676f45cde226 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RFFT2D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RFFT2D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RFFT2D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RFFT3D.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RFFT3D.pbtxt index 754d372cf560ce..6475cd47316c19 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RFFT3D.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RFFT3D.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RFFT3D" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RFFTND.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RFFTND.pbtxt index 7f24e19efc06bf..8f75bea6baef82 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RFFTND.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RFFTND.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RFFTND" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RGBToHSV.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RGBToHSV.pbtxt index 22b6995682fb33..9ed50d337d0346 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RGBToHSV.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RGBToHSV.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RGBToHSV" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedBincount.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedBincount.pbtxt index 3fda312a6a4653..4f5fb24109cad4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RaggedBincount.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedBincount.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RaggedBincount" input_arg { name: "splits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedCountSparseOutput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedCountSparseOutput.pbtxt index 644b0eca3ad379..aa1a4e07aafaa2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RaggedCountSparseOutput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedCountSparseOutput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RaggedCountSparseOutput" input_arg { name: "splits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedCross.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedCross.pbtxt index 2407315da2ee8a..0e9fe0adcfba2c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RaggedCross.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedCross.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RaggedCross" input_arg { name: "ragged_values" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedFillEmptyRows.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedFillEmptyRows.pbtxt index 54ba7be3cb1fdc..4587abc0f66e9a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RaggedFillEmptyRows.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedFillEmptyRows.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RaggedFillEmptyRows" input_arg { name: "value_rowids" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedFillEmptyRowsGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedFillEmptyRowsGrad.pbtxt index 04f8b3e6f3a821..ea9546d504068f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RaggedFillEmptyRowsGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedFillEmptyRowsGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RaggedFillEmptyRowsGrad" input_arg { name: "reverse_index_map" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedGather.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedGather.pbtxt index 5e0e69e365a36c..afa14e8e8a8b20 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RaggedGather.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedGather.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RaggedGather" input_arg { name: "params_nested_splits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedRange.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedRange.pbtxt index e895110b6372a1..866c9b472d906d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RaggedRange.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedRange.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RaggedRange" input_arg { name: "starts" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorFromVariant.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorFromVariant.pbtxt index 50d8a8dd1b86db..5c750d0ce7e567 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorFromVariant.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorFromVariant.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RaggedTensorFromVariant" input_arg { name: "encoded_ragged" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToSparse.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToSparse.pbtxt index bcbb0d25bc94ff..f9172b4cf3772a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToSparse.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToSparse.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RaggedTensorToSparse" input_arg { name: "rt_nested_splits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToTensor.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToTensor.pbtxt index 8ecd7fb2c86ec3..60fceb565ebc03 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToTensor.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToTensor.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RaggedTensorToTensor" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToVariant.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToVariant.pbtxt index 93d6b27fd053a1..f875a2a2ca7fae 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToVariant.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToVariant.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RaggedTensorToVariant" input_arg { name: "rt_nested_splits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToVariantGradient.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToVariantGradient.pbtxt index fc6ede9bd741bf..45f2fcefe04210 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToVariantGradient.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedTensorToVariantGradient.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RaggedTensorToVariantGradient" input_arg { name: "encoded_ragged_grad" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomCrop.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomCrop.pbtxt index 9ca7d2f4b5b3bd..a5353cf58d5d41 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomCrop.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomCrop.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomCrop" input_arg { name: "image" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomDataset.pbtxt index 99fb23493250ca..d646d19b2e5ef2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomDataset" input_arg { name: "seed" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomDatasetV2.pbtxt index d561045e0b738d..3cf01f7644e9f1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomDatasetV2" input_arg { name: "seed" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomGamma.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomGamma.pbtxt index 7c655027ce1832..2f38a20f8f0a2e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomGamma.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomGamma.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomGamma" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomGammaGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomGammaGrad.pbtxt index f0c1b5033a5ddd..1e1c0723f6cbfe 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomGammaGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomGammaGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomGammaGrad" input_arg { name: "alpha" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomIndexShuffle.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomIndexShuffle.pbtxt index 06954c6cc639a3..22e6b88af0a921 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomIndexShuffle.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomIndexShuffle.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomIndexShuffle" input_arg { name: "index" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomPoisson.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomPoisson.pbtxt index 33d8920e36dcaa..5499e8d678c590 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomPoisson.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomPoisson.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomPoisson" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomPoissonV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomPoissonV2.pbtxt index 1330833972ef23..6c3d9827a35cf8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomPoissonV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomPoissonV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomPoissonV2" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomShuffle.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomShuffle.pbtxt index 189e7d770a0652..ddd1a8d3f2b2c1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomShuffle.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomShuffle.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomShuffle" input_arg { name: "value" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomShuffleQueue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomShuffleQueue.pbtxt index 5793974674e561..550acae8d5be11 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomShuffleQueue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomShuffleQueue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomShuffleQueue" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomShuffleQueueV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomShuffleQueueV2.pbtxt index b73d70541fafab..7d9807c4e9564b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomShuffleQueueV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomShuffleQueueV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomShuffleQueueV2" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomStandardNormal.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomStandardNormal.pbtxt index c693a9f7ba2e17..71fe5e5ef326b8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomStandardNormal.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomStandardNormal.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomStandardNormal" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomUniform.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomUniform.pbtxt index 63c637f0de1309..449a9ef973929c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomUniform.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomUniform.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomUniform" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RandomUniformInt.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RandomUniformInt.pbtxt index 35ad67cb371c9e..3b89715afcad98 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RandomUniformInt.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RandomUniformInt.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RandomUniformInt" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Range.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Range.pbtxt index 6c2023eb68e78c..306116f59bfd42 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Range.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Range.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Range" input_arg { name: "start" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RangeDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RangeDataset.pbtxt index 91fe787a7be594..9121cf0567e518 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RangeDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RangeDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RangeDataset" input_arg { name: "start" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Rank.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Rank.pbtxt index d44aeb9ec5fe4e..c12fd9a0abf07b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Rank.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Rank.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Rank" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReadFile.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReadFile.pbtxt index 8bf1beeb43a473..ce1985ec3c5cba 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReadFile.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReadFile.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReadFile" input_arg { name: "filename" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReadVariableOp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReadVariableOp.pbtxt index 2123cfedd6cb68..5459632d58351e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReadVariableOp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReadVariableOp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReadVariableOp" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReadVariableXlaSplitND.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReadVariableXlaSplitND.pbtxt index be5364ceb1f1b5..f5216da5c96aac 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReadVariableXlaSplitND.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReadVariableXlaSplitND.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReadVariableXlaSplitND" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderNumRecordsProduced.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderNumRecordsProduced.pbtxt index 4514d3aac61611..50b1ea00da8f29 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderNumRecordsProduced.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderNumRecordsProduced.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderNumRecordsProduced" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderNumRecordsProducedV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderNumRecordsProducedV2.pbtxt index bff4305f3bfbb8..f560f01d443b08 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderNumRecordsProducedV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderNumRecordsProducedV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderNumRecordsProducedV2" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderNumWorkUnitsCompleted.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderNumWorkUnitsCompleted.pbtxt index 4ef2c1950f9507..b1e361e0119f22 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderNumWorkUnitsCompleted.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderNumWorkUnitsCompleted.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderNumWorkUnitsCompleted" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderNumWorkUnitsCompletedV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderNumWorkUnitsCompletedV2.pbtxt index 8f136052d08810..ee4c93e19bab64 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderNumWorkUnitsCompletedV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderNumWorkUnitsCompletedV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderNumWorkUnitsCompletedV2" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderRead.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderRead.pbtxt index fefc032a3460d9..b2a933892c4226 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderRead.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderRead.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderRead" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderReadUpTo.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderReadUpTo.pbtxt index 148095de5cae3a..e3bb64ec391d86 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderReadUpTo.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderReadUpTo.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderReadUpTo" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderReadUpToV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderReadUpToV2.pbtxt index f007588ebb608b..2ad62b16c9e928 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderReadUpToV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderReadUpToV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderReadUpToV2" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderReadV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderReadV2.pbtxt index 4e74f1b5b75080..3a1573147dcbe4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderReadV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderReadV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderReadV2" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderReset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderReset.pbtxt index 4379c400833e0f..9607f83c47016d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderReset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderReset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderReset" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderResetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderResetV2.pbtxt index eecd28bb726e79..56f862ae795184 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderResetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderResetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderResetV2" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderRestoreState.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderRestoreState.pbtxt index 1c5f71fe0f3521..717a5c34c8de93 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderRestoreState.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderRestoreState.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderRestoreState" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderRestoreStateV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderRestoreStateV2.pbtxt index 1b10902e081b7b..f75b04fc59d4e0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderRestoreStateV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderRestoreStateV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderRestoreStateV2" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderSerializeState.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderSerializeState.pbtxt index d3a95d06f33347..2f708cb8926695 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderSerializeState.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderSerializeState.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderSerializeState" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReaderSerializeStateV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReaderSerializeStateV2.pbtxt index 4946352509bcb4..c4ade1409fbacd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReaderSerializeStateV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReaderSerializeStateV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReaderSerializeStateV2" input_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Real.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Real.pbtxt index 0f326a3de147ed..d7e783ebe72f15 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Real.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Real.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Real" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RealDiv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RealDiv.pbtxt index a5a78f14cc7c5a..6f725e22c64473 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RealDiv.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RealDiv.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RealDiv" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RebatchDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RebatchDataset.pbtxt index bc70fcfadadb8e..176f94a3329c2b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RebatchDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RebatchDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RebatchDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RebatchDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RebatchDatasetV2.pbtxt index fda65d7bfaba18..fd853b65b2685c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RebatchDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RebatchDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RebatchDatasetV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Reciprocal.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Reciprocal.pbtxt index 35cab85ee2a898..7e03554871a4eb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Reciprocal.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Reciprocal.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Reciprocal" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReciprocalGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReciprocalGrad.pbtxt index eea1bbe1c8d667..8884c796da5e6e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReciprocalGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReciprocalGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReciprocalGrad" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RecordInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RecordInput.pbtxt index c353a17b318002..a72374420ee28e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RecordInput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RecordInput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RecordInput" output_arg { name: "records" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Recv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Recv.pbtxt index 87c7eb65ca87e8..e6717e57f8dedf 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Recv.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Recv.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Recv" output_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RecvTPUEmbeddingActivations.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RecvTPUEmbeddingActivations.pbtxt index 4cb9bd42ec045c..0fec828421f91b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RecvTPUEmbeddingActivations.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RecvTPUEmbeddingActivations.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RecvTPUEmbeddingActivations" output_arg { name: "outputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReduceDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReduceDataset.pbtxt index c98f1045e53af2..a1a17bc20c534d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReduceDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReduceDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReduceDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReduceJoin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReduceJoin.pbtxt index 3603cf38c0b098..28880fcfb06a99 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReduceJoin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReduceJoin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReduceJoin" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RefEnter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RefEnter.pbtxt index 8c7921571d9d24..9af599d1b338bb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RefEnter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RefEnter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RefEnter" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RefExit.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RefExit.pbtxt index b13adfac00995a..1f9e84e7fade8b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RefExit.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RefExit.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RefExit" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RefIdentity.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RefIdentity.pbtxt index 4fa1105b6952d0..d2293fdf467a7b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RefIdentity.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RefIdentity.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RefIdentity" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RefMerge.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RefMerge.pbtxt index 31913cc794708d..fc4794d2f2f9d7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RefMerge.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RefMerge.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RefMerge" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RefNextIteration.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RefNextIteration.pbtxt index 453bce7c335929..d447a3a87b23e4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RefNextIteration.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RefNextIteration.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RefNextIteration" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RefSelect.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RefSelect.pbtxt index 688a34a5886fd4..aa2645f9ff17a3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RefSelect.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RefSelect.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RefSelect" input_arg { name: "index" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RefSwitch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RefSwitch.pbtxt index fba45dc4f98e00..6d12be2e5a8fa9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RefSwitch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RefSwitch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RefSwitch" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RegexFullMatch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RegexFullMatch.pbtxt index 7b337477adb60d..f2c0b7b99f1c71 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RegexFullMatch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RegexFullMatch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RegexFullMatch" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RegexReplace.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RegexReplace.pbtxt index d8e7b8c143f4be..591773ce37416d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RegexReplace.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RegexReplace.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RegexReplace" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RegisterDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RegisterDataset.pbtxt index 3cac6a9d2a0b6d..a144d96163e746 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RegisterDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RegisterDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RegisterDataset" input_arg { name: "dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RegisterDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RegisterDatasetV2.pbtxt index 3031d65c39b18b..1ebced2af8d75f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RegisterDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RegisterDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RegisterDatasetV2" input_arg { name: "dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Relayout.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Relayout.pbtxt index 118574bf66fa05..da96a32cf86245 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Relayout.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Relayout.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Relayout" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RelayoutLike.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RelayoutLike.pbtxt index df5b2f89a73424..7a5af4f919e1c9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RelayoutLike.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RelayoutLike.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RelayoutLike" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Relu.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Relu.pbtxt index 42ae6688c797ef..703fbbeff56f91 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Relu.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Relu.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Relu" input_arg { name: "features" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Relu6.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Relu6.pbtxt index 110c799682733e..311c3297a16411 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Relu6.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Relu6.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Relu6" input_arg { name: "features" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Relu6Grad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Relu6Grad.pbtxt index 5455dc384cca0c..618e13a2297acc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Relu6Grad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Relu6Grad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Relu6Grad" input_arg { name: "gradients" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReluGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReluGrad.pbtxt index 89c486883f26c7..b14f23bb30bddc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReluGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReluGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReluGrad" input_arg { name: "gradients" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RemoteCall.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RemoteCall.pbtxt index efe22174170af6..c6bc594510340c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RemoteCall.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RemoteCall.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RemoteCall" input_arg { name: "target" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RepeatDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RepeatDataset.pbtxt index f8f2cd8da0f50c..b7914feb4dfbc7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RepeatDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RepeatDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RepeatDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RequantizationRange.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RequantizationRange.pbtxt index 9ff7aa997d1a0c..6a489081eda2da 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RequantizationRange.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RequantizationRange.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RequantizationRange" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RequantizationRangePerChannel.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RequantizationRangePerChannel.pbtxt index f0352b164b215b..b621afb7a80b52 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RequantizationRangePerChannel.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RequantizationRangePerChannel.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RequantizationRangePerChannel" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Requantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Requantize.pbtxt index a142594170dc41..c04d32f19aeb16 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Requantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Requantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Requantize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RequantizePerChannel.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RequantizePerChannel.pbtxt index 1e5b406fc0fb1e..3ed03fe12de763 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RequantizePerChannel.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RequantizePerChannel.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RequantizePerChannel" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Reshape.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Reshape.pbtxt index fcbd49a8ce805f..e422ffa2470633 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Reshape.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Reshape.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Reshape" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResizeArea.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResizeArea.pbtxt index 315069f44fa499..ec861564fc4cc6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResizeArea.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResizeArea.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResizeArea" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResizeBicubic.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResizeBicubic.pbtxt index 2de5ed2e280f54..5e479a9432b876 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResizeBicubic.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResizeBicubic.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResizeBicubic" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResizeBicubicGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResizeBicubicGrad.pbtxt index b085c1d6f46f67..6de227d7e0fd8f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResizeBicubicGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResizeBicubicGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResizeBicubicGrad" input_arg { name: "grads" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResizeBilinear.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResizeBilinear.pbtxt index ea43150a420c04..b991a8ad63ddc8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResizeBilinear.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResizeBilinear.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResizeBilinear" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResizeBilinearGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResizeBilinearGrad.pbtxt index a78a0b699d7072..79d1605fd1c4a8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResizeBilinearGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResizeBilinearGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResizeBilinearGrad" input_arg { name: "grads" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResizeNearestNeighbor.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResizeNearestNeighbor.pbtxt index a61928b06f38ff..18d87c3583c856 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResizeNearestNeighbor.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResizeNearestNeighbor.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResizeNearestNeighbor" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResizeNearestNeighborGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResizeNearestNeighborGrad.pbtxt index 5bf5aa96eccc2b..001525b07337d6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResizeNearestNeighborGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResizeNearestNeighborGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResizeNearestNeighborGrad" input_arg { name: "grads" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorApplyGradient.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorApplyGradient.pbtxt index 05ec4d8a404634..a1e03efd365a02 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorApplyGradient.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorApplyGradient.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceAccumulatorApplyGradient" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorNumAccumulated.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorNumAccumulated.pbtxt index 5773bd5cc85b18..398171da210bf4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorNumAccumulated.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorNumAccumulated.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceAccumulatorNumAccumulated" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorSetGlobalStep.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorSetGlobalStep.pbtxt index 902748a29f5836..3e9c5a29882859 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorSetGlobalStep.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorSetGlobalStep.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceAccumulatorSetGlobalStep" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorTakeGradient.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorTakeGradient.pbtxt index 5f37567706f7f0..d690fd198e62bd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorTakeGradient.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceAccumulatorTakeGradient.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceAccumulatorTakeGradient" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdaMax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdaMax.pbtxt index ef87929c4f5eb6..3f307b70b7cac2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdaMax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdaMax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyAdaMax" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdadelta.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdadelta.pbtxt index 2a671e082a687f..f4c58b9860a033 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdadelta.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdadelta.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyAdadelta" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdagrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdagrad.pbtxt index 67881c44ac854d..3c8e9d071a98b3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdagrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdagrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyAdagrad" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdagradDA.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdagradDA.pbtxt index 1bab7daf7aea46..996bc0b1a5cd71 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdagradDA.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdagradDA.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyAdagradDA" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdagradV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdagradV2.pbtxt index 66aaa456bedf82..09168ff5c4e03b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdagradV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdagradV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyAdagradV2" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdam.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdam.pbtxt index e76225de60701e..88cafe521d6800 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdam.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdam.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyAdam" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdamWithAmsgrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdamWithAmsgrad.pbtxt index ab8696a3263a89..ccd84e52d5a820 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdamWithAmsgrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAdamWithAmsgrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyAdamWithAmsgrad" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAddSign.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAddSign.pbtxt index 44f5b7d88e0c07..4653229b0aebf1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAddSign.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyAddSign.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyAddSign" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyCenteredRMSProp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyCenteredRMSProp.pbtxt index b7c69b1c832fff..f6fa5b2a351164 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyCenteredRMSProp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyCenteredRMSProp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyCenteredRMSProp" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyFtrl.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyFtrl.pbtxt index f94944686d5a14..f917b0fc22fa6f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyFtrl.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyFtrl.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyFtrl" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyFtrlV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyFtrlV2.pbtxt index 597ce4ab164aa0..d5586b7f73de74 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyFtrlV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyFtrlV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyFtrlV2" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyGradientDescent.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyGradientDescent.pbtxt index 6bd3170e2c6bde..83639c23ea1d7d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyGradientDescent.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyGradientDescent.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyGradientDescent" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyKerasMomentum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyKerasMomentum.pbtxt index 12e22d4167d7dc..737020cd744516 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyKerasMomentum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyKerasMomentum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyKerasMomentum" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyMomentum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyMomentum.pbtxt index 20de47ab2895d0..a6d0dd9243836e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyMomentum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyMomentum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyMomentum" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyPowerSign.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyPowerSign.pbtxt index 261463a5512584..5a7e103c707c04 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyPowerSign.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyPowerSign.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyPowerSign" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyProximalAdagrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyProximalAdagrad.pbtxt index 2c6007597ca3b1..c3adae4e5fc900 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyProximalAdagrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyProximalAdagrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyProximalAdagrad" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyProximalGradientDescent.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyProximalGradientDescent.pbtxt index dbe02a88ff079d..9c55be37dfc8da 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyProximalGradientDescent.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyProximalGradientDescent.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyProximalGradientDescent" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyRMSProp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyRMSProp.pbtxt index 90f24a83fb81e7..b0be4acdf19643 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyRMSProp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceApplyRMSProp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceApplyRMSProp" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceConditionalAccumulator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceConditionalAccumulator.pbtxt index 389486faef4e8e..cf3c150ccb9984 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceConditionalAccumulator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceConditionalAccumulator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceConditionalAccumulator" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceCountUpTo.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceCountUpTo.pbtxt index 3f07aa17a613e4..352935c2c167a4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceCountUpTo.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceCountUpTo.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceCountUpTo" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceGather.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceGather.pbtxt index 47a841492de645..9aa33d994c6cea 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceGather.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceGather.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceGather" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceGatherNd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceGatherNd.pbtxt index f8df557edbbdbb..04794f402cde00 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceGatherNd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceGatherNd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceGatherNd" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterAdd.pbtxt index 5b10cf3f16c0ce..a061da9db360bc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceScatterAdd" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterDiv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterDiv.pbtxt index 0b94ef0dec43bd..d5b683c107ef47 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterDiv.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterDiv.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceScatterDiv" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterMax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterMax.pbtxt index d6e97844047d95..d7f1e9c8a37e69 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterMax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterMax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceScatterMax" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterMin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterMin.pbtxt index d012a861218190..617f12b35c83a7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterMin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterMin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceScatterMin" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterMul.pbtxt index aa859ad4d252fe..313b2ed03c038b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceScatterMul" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdAdd.pbtxt index 5cc51a5559e4a0..507b30eff9a2d6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceScatterNdAdd" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdMax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdMax.pbtxt index 9933b54896edff..8a40ac6e842a03 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdMax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdMax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceScatterNdMax" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdMin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdMin.pbtxt index bd0e7589184074..7567b43d39f025 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdMin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdMin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceScatterNdMin" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdSub.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdSub.pbtxt index bd5785e83d4bf8..9d1a74daa931f8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdSub.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdSub.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceScatterNdSub" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdUpdate.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdUpdate.pbtxt index 0b6ae92e036d84..4305163fc92280 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdUpdate.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterNdUpdate.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceScatterNdUpdate" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterSub.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterSub.pbtxt index 32257603e3aeef..3a267f9f63b289 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterSub.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterSub.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceScatterSub" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterUpdate.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterUpdate.pbtxt index 320a7b0285e79d..55101b84c33871 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterUpdate.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceScatterUpdate.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceScatterUpdate" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdadelta.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdadelta.pbtxt index 21aa0addab9259..562e667d584b04 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdadelta.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdadelta.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceSparseApplyAdadelta" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdagrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdagrad.pbtxt index 7cc9e1f5cd2927..4588dc668bd9b4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdagrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdagrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceSparseApplyAdagrad" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdagradDA.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdagradDA.pbtxt index c28ddc89e06058..e9ef6077dcedf7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdagradDA.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdagradDA.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceSparseApplyAdagradDA" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdagradV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdagradV2.pbtxt index a14ba1dc5d2d55..e0724d11142c5a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdagradV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyAdagradV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceSparseApplyAdagradV2" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyCenteredRMSProp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyCenteredRMSProp.pbtxt index 855c9982f3f6f5..6d02f81629efda 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyCenteredRMSProp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyCenteredRMSProp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceSparseApplyCenteredRMSProp" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyFtrl.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyFtrl.pbtxt index 7bb28e90106812..8ac38c09e514b4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyFtrl.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyFtrl.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceSparseApplyFtrl" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyFtrlV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyFtrlV2.pbtxt index c43d4a60f27998..cff68beaf1cbe7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyFtrlV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyFtrlV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceSparseApplyFtrlV2" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyKerasMomentum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyKerasMomentum.pbtxt index 96b22b6cf0bf4a..648d85a4989da1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyKerasMomentum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyKerasMomentum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceSparseApplyKerasMomentum" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyMomentum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyMomentum.pbtxt index 03b05570cb5ccd..ec3e5a96db592b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyMomentum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyMomentum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceSparseApplyMomentum" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyProximalAdagrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyProximalAdagrad.pbtxt index 0093fb2c50f763..59ae118cd9522d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyProximalAdagrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyProximalAdagrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceSparseApplyProximalAdagrad" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyProximalGradientDescent.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyProximalGradientDescent.pbtxt index 0844c8d434921c..246f442d27756e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyProximalGradientDescent.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyProximalGradientDescent.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceSparseApplyProximalGradientDescent" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyRMSProp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyRMSProp.pbtxt index 87803f3e89366f..8735a6fcacc18e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyRMSProp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceSparseApplyRMSProp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceSparseApplyRMSProp" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ResourceStridedSliceAssign.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ResourceStridedSliceAssign.pbtxt index 430195fab5c0d0..867f205958e76c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ResourceStridedSliceAssign.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ResourceStridedSliceAssign.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ResourceStridedSliceAssign" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Restore.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Restore.pbtxt index 269b27c0b71f59..1db029076032ac 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Restore.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Restore.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Restore" input_arg { name: "file_pattern" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RestoreSlice.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RestoreSlice.pbtxt index 74303c89f05152..03d2aa3bbf56ff 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RestoreSlice.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RestoreSlice.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RestoreSlice" input_arg { name: "file_pattern" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RestoreV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RestoreV2.pbtxt index 99319760e597e7..a88db314bc4906 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RestoreV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RestoreV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RestoreV2" input_arg { name: "prefix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveAllTPUEmbeddingParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveAllTPUEmbeddingParameters.pbtxt index ea305f05a813bb..226bed39734bf1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveAllTPUEmbeddingParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveAllTPUEmbeddingParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveAllTPUEmbeddingParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingADAMParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingADAMParameters.pbtxt index adfc5f50337be2..dfef40371c8098 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingADAMParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingADAMParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveTPUEmbeddingADAMParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingAdadeltaParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingAdadeltaParameters.pbtxt index 3c514f34bde42c..f887714c78b20e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingAdadeltaParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingAdadeltaParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveTPUEmbeddingAdadeltaParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingAdagradMomentumParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingAdagradMomentumParameters.pbtxt index 747aecc018ccc4..9a84dc7b0cc57f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingAdagradMomentumParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingAdagradMomentumParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveTPUEmbeddingAdagradMomentumParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingAdagradParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingAdagradParameters.pbtxt index cc9f172411d370..27e1aa441b60ae 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingAdagradParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingAdagradParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveTPUEmbeddingAdagradParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingCenteredRMSPropParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingCenteredRMSPropParameters.pbtxt index 6ce83985ca788f..afdcbd5cb5e7c9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingCenteredRMSPropParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingCenteredRMSPropParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveTPUEmbeddingCenteredRMSPropParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingFTRLParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingFTRLParameters.pbtxt index d0d5fd74a5f8b1..161fde82db383b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingFTRLParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingFTRLParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveTPUEmbeddingFTRLParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingFrequencyEstimatorParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingFrequencyEstimatorParameters.pbtxt index 1125ad0e594c68..633bf51cab0d68 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingFrequencyEstimatorParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingFrequencyEstimatorParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveTPUEmbeddingFrequencyEstimatorParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingMDLAdagradLightParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingMDLAdagradLightParameters.pbtxt index c2e4047eacfc4a..bf9d96bf37dea8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingMDLAdagradLightParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingMDLAdagradLightParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveTPUEmbeddingMDLAdagradLightParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingMomentumParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingMomentumParameters.pbtxt index 6db5898c1e3a9a..3b8f98b755312f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingMomentumParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingMomentumParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveTPUEmbeddingMomentumParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingProximalAdagradParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingProximalAdagradParameters.pbtxt index 5cf6dbfe8d767a..bd7c8288ff2af4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingProximalAdagradParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingProximalAdagradParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveTPUEmbeddingProximalAdagradParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingProximalYogiParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingProximalYogiParameters.pbtxt index dedd0ee1826811..a74c0caa0862ad 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingProximalYogiParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingProximalYogiParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveTPUEmbeddingProximalYogiParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingRMSPropParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingRMSPropParameters.pbtxt index d4028e2d164b75..137d3bd497dd76 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingRMSPropParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingRMSPropParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveTPUEmbeddingRMSPropParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingStochasticGradientDescentParameters.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingStochasticGradientDescentParameters.pbtxt index 44b3a038b1ca5e..da65f5ae98e542 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingStochasticGradientDescentParameters.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RetrieveTPUEmbeddingStochasticGradientDescentParameters.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RetrieveTPUEmbeddingStochasticGradientDescentParameters" output_arg { name: "parameters" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Reverse.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Reverse.pbtxt index 9eced34f0a50d9..aecf71f072f0cd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Reverse.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Reverse.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Reverse" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReverseSequence.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReverseSequence.pbtxt index c3119a2fe49707..74d3601e1f4aba 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReverseSequence.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReverseSequence.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReverseSequence" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ReverseV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ReverseV2.pbtxt index fc9a980a8712bc..8cbc98e30a5390 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ReverseV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ReverseV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ReverseV2" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RewriteDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RewriteDataset.pbtxt index d831b7cac3f549..3e3e43b46fed65 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RewriteDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RewriteDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RewriteDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RightShift.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RightShift.pbtxt index 76bcdc4bd20b38..97257a019725a2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RightShift.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RightShift.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RightShift" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Rint.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Rint.pbtxt index f128a08a47a130..feed3bca0b3ef1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Rint.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Rint.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Rint" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscAbs.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscAbs.pbtxt index a1d64124f38587..2b3111bf580739 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscAbs.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscAbs.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscAbs" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscAdd.pbtxt index d98094d312b9ad..86cc81e964c101 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscAdd" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscBinaryArithmetic.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscBinaryArithmetic.pbtxt index f2919e4c26cbf9..9d5f080ed02465 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscBinaryArithmetic.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscBinaryArithmetic.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscBinaryArithmetic" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscBinaryComparison.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscBinaryComparison.pbtxt index 88731716d93946..d131476f8fe944 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscBinaryComparison.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscBinaryComparison.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscBinaryComparison" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscBitcast.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscBitcast.pbtxt index 738659831a958d..1d37369adec753 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscBitcast.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscBitcast.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscBitcast" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscBroadcast.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscBroadcast.pbtxt index 61318f39439e77..e81e8413dfb88b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscBroadcast.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscBroadcast.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscBroadcast" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscCast.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscCast.pbtxt index 2b82bc3ddc9939..344d0496b27962 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscCast.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscCast.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscCast" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscCeil.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscCeil.pbtxt index 904aca9f50b86d..ff1fefc0115839 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscCeil.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscCeil.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscCeil" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscCholesky.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscCholesky.pbtxt index 3b8d59b6efac76..c6b24d107be7f3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscCholesky.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscCholesky.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscCholesky" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscConcat.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscConcat.pbtxt index 0b062957805f5f..889de347165b26 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscConcat.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscConcat.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscConcat" input_arg { name: "values" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscCondition.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscCondition.pbtxt index a348796ece3143..859814c65905b2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscCondition.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscCondition.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscCondition" input_arg { name: "pred" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscConv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscConv.pbtxt index 06103b7729dab5..e9d326be8f286f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscConv.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscConv.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscConv" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscCos.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscCos.pbtxt index d89ad75fc9fa25..98cebc12c5a9ee 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscCos.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscCos.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscCos" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscDiv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscDiv.pbtxt index d2196f83d64b61..98aaf5ae97b8c5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscDiv.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscDiv.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscDiv" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscDot.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscDot.pbtxt index 55101191acb75f..0b3ff5c11e3646 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscDot.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscDot.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscDot" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscExp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscExp.pbtxt index 9775069c7f693e..4386db96ae9960 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscExp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscExp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscExp" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscFft.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscFft.pbtxt index de6960750f34ef..605cd7edd6e04e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscFft.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscFft.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscFft" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscFloor.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscFloor.pbtxt index 71b73dca0fe261..55941ca352c2ec 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscFloor.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscFloor.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscFloor" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscGather.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscGather.pbtxt index 87681965ec7f3d..18d4ba3fdc90d4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscGather.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscGather.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscGather" input_arg { name: "params" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscImag.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscImag.pbtxt index c6e17d5296d93e..555ea9d071fe6a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscImag.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscImag.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscImag" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscIsFinite.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscIsFinite.pbtxt index 1b6574899d378d..19a4ae6617c0cb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscIsFinite.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscIsFinite.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscIsFinite" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscLog.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscLog.pbtxt index e52bc8fbe8ad02..23bbef3b07a3b0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscLog.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscLog.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscLog" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscLogicalAnd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscLogicalAnd.pbtxt index 04777105ffae7f..8bd4410a056174 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscLogicalAnd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscLogicalAnd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscLogicalAnd" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscLogicalNot.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscLogicalNot.pbtxt index d743f6c5f935bf..3496ef02e435a8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscLogicalNot.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscLogicalNot.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscLogicalNot" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscLogicalOr.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscLogicalOr.pbtxt index 5550a34f7d9bc1..3cf31921d5aa0c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscLogicalOr.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscLogicalOr.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscLogicalOr" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscMax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscMax.pbtxt index 9ecd924242589f..11c4517d7566b3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscMax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscMax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscMax" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscMin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscMin.pbtxt index 225a66f5f952de..7ac92ec5ca4a12 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscMin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscMin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscMin" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscMul.pbtxt index 787d58e56f074d..e55fca7a910539 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscMul" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscNeg.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscNeg.pbtxt index 2d4a2b3425a37c..429d94153c2490 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscNeg.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscNeg.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscNeg" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscPad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscPad.pbtxt index 3707792d018c3c..13ea65b0bd3974 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscPad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscPad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscPad" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscPool.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscPool.pbtxt index 766557c3b401f4..57847ff00d0cb4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscPool.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscPool.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscPool" input_arg { name: "value" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscPow.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscPow.pbtxt index 89d58894ce23f3..150c846fb0396f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscPow.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscPow.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscPow" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscRandomUniform.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscRandomUniform.pbtxt index 29261856fe30a6..2d3cd00a70c0ea 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscRandomUniform.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscRandomUniform.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscRandomUniform" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscReal.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscReal.pbtxt index 5c299538003771..cf624262303984 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscReal.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscReal.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscReal" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscReduce.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscReduce.pbtxt index fc067edf50fa64..1dff780022c33e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscReduce.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscReduce.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscReduce" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscRem.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscRem.pbtxt index 5392ef47912f15..ffa8ff1bc61e82 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscRem.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscRem.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscRem" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscReshape.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscReshape.pbtxt index a4508f5347d382..f686036497b850 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscReshape.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscReshape.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscReshape" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscReverse.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscReverse.pbtxt index 3ee303ebc4ea2c..60dec4d9b9d64b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscReverse.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscReverse.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscReverse" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscScatter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscScatter.pbtxt index d37788f996fb13..5def9d11dc486b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscScatter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscScatter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscScatter" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscShape.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscShape.pbtxt index a9e95b87a8abd1..615e2b7e8bbba9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscShape.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscShape.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscShape" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscSign.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscSign.pbtxt index c3478d94634773..db7894468abf7b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscSign.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscSign.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscSign" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscSlice.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscSlice.pbtxt index afdc888c886ef2..b09072c5ae7f08 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscSlice.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscSlice.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscSlice" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscSort.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscSort.pbtxt index 60ca56bc40cab6..c49a6951bdbeff 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscSort.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscSort.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscSort" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscSqueeze.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscSqueeze.pbtxt index 0c70b1088f6aba..bf4e7123ddabc0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscSqueeze.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscSqueeze.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscSqueeze" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscSub.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscSub.pbtxt index 8a83934419e223..2590f9fa34cce6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscSub.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscSub.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscSub" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscTranspose.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscTranspose.pbtxt index eb7dce89a9aa35..856b0d67ffb98c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscTranspose.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscTranspose.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscTranspose" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscTriangularSolve.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscTriangularSolve.pbtxt index c2095295f40a00..5b8518fc9bedd7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscTriangularSolve.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscTriangularSolve.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscTriangularSolve" input_arg { name: "matrix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscUnary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscUnary.pbtxt index d825acd378f0b8..0a7af35478abb0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscUnary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscUnary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscUnary" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RiscWhile.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RiscWhile.pbtxt index 9e4695be194b3f..8bb4745a1e135c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RiscWhile.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RiscWhile.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RiscWhile" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RngReadAndSkip.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RngReadAndSkip.pbtxt index d369d4dc9ae400..e64f5dd6d2680c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RngReadAndSkip.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RngReadAndSkip.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RngReadAndSkip" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RngSkip.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RngSkip.pbtxt index 6ef7c4b9ae69ea..dc3e9b948b7ed6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RngSkip.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RngSkip.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RngSkip" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Roll.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Roll.pbtxt index cfb7c101757c7b..ac81404fece17e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Roll.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Roll.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Roll" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Round.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Round.pbtxt index bb883d57b9a84e..c5685dc61439b1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Round.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Round.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Round" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Rsqrt.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Rsqrt.pbtxt index 87e8420b1e96f6..6d066c9e00cee9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Rsqrt.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Rsqrt.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Rsqrt" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/RsqrtGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RsqrtGrad.pbtxt index 9298cb8a73e1dd..4509b1af361e3d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/RsqrtGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/RsqrtGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "RsqrtGrad" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SampleDistortedBoundingBox.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SampleDistortedBoundingBox.pbtxt index 527d0b2f5000c6..95b4a2ddd5f95f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SampleDistortedBoundingBox.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SampleDistortedBoundingBox.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SampleDistortedBoundingBox" input_arg { name: "image_size" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SampleDistortedBoundingBoxV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SampleDistortedBoundingBoxV2.pbtxt index 3600eaf126b48b..d857ee0a68795d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SampleDistortedBoundingBoxV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SampleDistortedBoundingBoxV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SampleDistortedBoundingBoxV2" input_arg { name: "image_size" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SamplingDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SamplingDataset.pbtxt index 549b71b08c77fd..160a9e9bb16588 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SamplingDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SamplingDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SamplingDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Save.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Save.pbtxt index c815380c490ad8..c632730bd91e7e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Save.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Save.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Save" input_arg { name: "filename" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SaveDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SaveDataset.pbtxt index 8b5b8fcbd1dbc7..18f99fbe3591ee 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SaveDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SaveDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SaveDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SaveDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SaveDatasetV2.pbtxt index e20303ad3b8d21..9407de7812bb12 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SaveDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SaveDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SaveDatasetV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SaveSlices.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SaveSlices.pbtxt index 4415c13dbb8f69..306d67bd688456 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SaveSlices.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SaveSlices.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SaveSlices" input_arg { name: "filename" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SaveV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SaveV2.pbtxt index c91f4d3159d0d3..d9bae4c8b8e12b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SaveV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SaveV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SaveV2" input_arg { name: "prefix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScalarSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScalarSummary.pbtxt index 48fa50e9382251..bf4948076abe9c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScalarSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScalarSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScalarSummary" input_arg { name: "tags" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScaleAndTranslate.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScaleAndTranslate.pbtxt index 4c3b0727ac80b0..516cca34539b9b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScaleAndTranslate.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScaleAndTranslate.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScaleAndTranslate" input_arg { name: "images" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScaleAndTranslateGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScaleAndTranslateGrad.pbtxt index 647f70653585b0..8eaa03c3933199 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScaleAndTranslateGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScaleAndTranslateGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScaleAndTranslateGrad" input_arg { name: "grads" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScanDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScanDataset.pbtxt index 7563a34c8c7df4..25de8c51a7a388 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScanDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScanDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScanDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterAdd.pbtxt index 70f2fc7f5ae9ae..0e47d683e05717 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterAdd" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterDiv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterDiv.pbtxt index c34776538354d3..2e75f22b0ed101 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterDiv.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterDiv.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterDiv" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterMax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterMax.pbtxt index 7ad8c98ea588fe..fe176e143b1874 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterMax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterMax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterMax" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterMin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterMin.pbtxt index e9dc8087a99349..7099d89f366c6a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterMin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterMin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterMin" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterMul.pbtxt index aa6f863e0bca77..149540b04aecc1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterMul" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterNd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterNd.pbtxt index 06877c844cbd08..75749c807b7368 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterNd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterNd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterNd" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterNdAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterNdAdd.pbtxt index 409f7d35e5f1e8..d093276f490609 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterNdAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterNdAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterNdAdd" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterNdMax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterNdMax.pbtxt index 1425d91fc0563a..2af2bf50069c0c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterNdMax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterNdMax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterNdMax" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterNdMin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterNdMin.pbtxt index 996fd4036e1cb5..c2a91a0ed313c8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterNdMin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterNdMin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterNdMin" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterNdNonAliasingAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterNdNonAliasingAdd.pbtxt index 5cf7d9f91f111b..eb8f48444b62ed 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterNdNonAliasingAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterNdNonAliasingAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterNdNonAliasingAdd" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterNdSub.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterNdSub.pbtxt index 6e85f0669efa5e..c9c73604d8ad20 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterNdSub.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterNdSub.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterNdSub" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterNdUpdate.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterNdUpdate.pbtxt index c3f7a023e18568..73def71d094156 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterNdUpdate.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterNdUpdate.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterNdUpdate" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterSub.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterSub.pbtxt index bf168e222834a8..9665d98a94e2d7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterSub.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterSub.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterSub" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ScatterUpdate.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ScatterUpdate.pbtxt index fc91f285899ac2..2f292734f17fb6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ScatterUpdate.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ScatterUpdate.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ScatterUpdate" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SdcaFprint.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SdcaFprint.pbtxt index 53ee0176bcf390..979c0016b3400d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SdcaFprint.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SdcaFprint.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SdcaFprint" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SdcaOptimizer.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SdcaOptimizer.pbtxt index d13dd131607794..3746f9504ac61c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SdcaOptimizer.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SdcaOptimizer.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SdcaOptimizer" input_arg { name: "sparse_example_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SdcaOptimizerV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SdcaOptimizerV2.pbtxt index 2052c0c4a26703..cb16c8f55a1677 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SdcaOptimizerV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SdcaOptimizerV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SdcaOptimizerV2" input_arg { name: "sparse_example_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SdcaShrinkL1.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SdcaShrinkL1.pbtxt index 8bc28bda66d735..23d9fdd793a2e9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SdcaShrinkL1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SdcaShrinkL1.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SdcaShrinkL1" input_arg { name: "weights" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SegmentMax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SegmentMax.pbtxt index c137b5ce06d672..a1d5968f9dd7bc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SegmentMax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SegmentMax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SegmentMax" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SegmentMaxV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SegmentMaxV2.pbtxt index a2186b6d58c8cb..f61b9101812e8a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SegmentMaxV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SegmentMaxV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SegmentMaxV2" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SegmentMean.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SegmentMean.pbtxt index a5a4a423220e35..b3c5ead77d8510 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SegmentMean.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SegmentMean.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SegmentMean" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SegmentMin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SegmentMin.pbtxt index feae22be352cfd..bf87e8294e9421 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SegmentMin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SegmentMin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SegmentMin" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SegmentMinV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SegmentMinV2.pbtxt index 16fd710dac8b47..fe2e5396ff0dde 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SegmentMinV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SegmentMinV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SegmentMinV2" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SegmentProd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SegmentProd.pbtxt index 4726236438d1a9..8fbc39235bf0f7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SegmentProd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SegmentProd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SegmentProd" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SegmentProdV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SegmentProdV2.pbtxt index a66300ef3e4230..9868f42e95da01 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SegmentProdV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SegmentProdV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SegmentProdV2" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SegmentSum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SegmentSum.pbtxt index 9f033b0db192b4..69289d5b7444b8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SegmentSum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SegmentSum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SegmentSum" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SegmentSumV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SegmentSumV2.pbtxt index f0bc5dce91d3b8..e11efffc4be916 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SegmentSumV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SegmentSumV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SegmentSumV2" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Select.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Select.pbtxt index c26b378e476155..38d00af197a978 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Select.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Select.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Select" input_arg { name: "condition" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SelectV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SelectV2.pbtxt index 0536e625bc1f15..a7c59f0d2a1778 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SelectV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SelectV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SelectV2" input_arg { name: "condition" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SelfAdjointEig.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SelfAdjointEig.pbtxt index c9bcf0815924a1..3657cc1bdc365d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SelfAdjointEig.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SelfAdjointEig.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SelfAdjointEig" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SelfAdjointEigV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SelfAdjointEigV2.pbtxt index 2e4b645639d6a6..8fbbfc961d7788 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SelfAdjointEigV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SelfAdjointEigV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SelfAdjointEigV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Selu.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Selu.pbtxt index d24219cff310f3..2acf579a5ca9a7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Selu.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Selu.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Selu" input_arg { name: "features" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SeluGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SeluGrad.pbtxt index d55750d1e75dd1..f96c7cbc158564 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SeluGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SeluGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SeluGrad" input_arg { name: "gradients" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Send.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Send.pbtxt index c2f8bd93e93c5c..73835060ddbcb2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Send.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Send.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Send" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SendTPUEmbeddingGradients.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SendTPUEmbeddingGradients.pbtxt index 0794e1c7e5c448..f6c486fe542bef 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SendTPUEmbeddingGradients.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SendTPUEmbeddingGradients.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SendTPUEmbeddingGradients" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SerializeIterator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SerializeIterator.pbtxt index ba098cea894104..262c9def883fca 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SerializeIterator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SerializeIterator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SerializeIterator" input_arg { name: "resource_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SerializeManySparse.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SerializeManySparse.pbtxt index cb0c4c86419eec..9e741634385c16 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SerializeManySparse.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SerializeManySparse.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SerializeManySparse" input_arg { name: "sparse_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SerializeSparse.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SerializeSparse.pbtxt index b9a18bcf35107c..5040d77fc2f407 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SerializeSparse.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SerializeSparse.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SerializeSparse" input_arg { name: "sparse_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SerializeTensor.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SerializeTensor.pbtxt index c739dc6a8330f3..4d7b5cf5766745 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SerializeTensor.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SerializeTensor.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SerializeTensor" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SetSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SetSize.pbtxt index 38c8b19d19dee3..185e5e09734e96 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SetSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SetSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SetSize" input_arg { name: "set_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SetStatsAggregatorDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SetStatsAggregatorDataset.pbtxt index cf7b57075cfa14..fa2dfd389adb8c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SetStatsAggregatorDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SetStatsAggregatorDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SetStatsAggregatorDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Shape.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Shape.pbtxt index c679caa24aad13..371bb9ef2fde71 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Shape.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Shape.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Shape" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ShapeN.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ShapeN.pbtxt index 04be1dd59c613c..15e9f11c0f8d3b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ShapeN.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ShapeN.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ShapeN" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ShardDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ShardDataset.pbtxt index 2c08879ca68dd0..c23a4d3d2e3f98 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ShardDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ShardDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ShardDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ShardedFilename.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ShardedFilename.pbtxt index df6d834f1f8c1e..cf46ffdbd78d54 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ShardedFilename.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ShardedFilename.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ShardedFilename" input_arg { name: "basename" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ShardedFilespec.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ShardedFilespec.pbtxt index 7c2e0806b5ec58..7d1badcf09e83b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ShardedFilespec.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ShardedFilespec.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ShardedFilespec" input_arg { name: "basename" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ShuffleAndRepeatDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ShuffleAndRepeatDataset.pbtxt index ca111a93648d73..195d66b2ab8956 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ShuffleAndRepeatDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ShuffleAndRepeatDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ShuffleAndRepeatDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ShuffleAndRepeatDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ShuffleAndRepeatDatasetV2.pbtxt index 2ac2e838c3db86..1d22404cf064e0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ShuffleAndRepeatDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ShuffleAndRepeatDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ShuffleAndRepeatDatasetV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ShuffleDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ShuffleDataset.pbtxt index e1b5ff3283e57a..35c0aa70c11696 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ShuffleDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ShuffleDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ShuffleDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ShuffleDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ShuffleDatasetV2.pbtxt index 5dce75878aa05f..9ec7fa282d6307 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ShuffleDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ShuffleDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ShuffleDatasetV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ShuffleDatasetV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ShuffleDatasetV3.pbtxt index ea30455a9f4acd..e037b818d4ffe1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ShuffleDatasetV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ShuffleDatasetV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ShuffleDatasetV3" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ShutdownDistributedTPU.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ShutdownDistributedTPU.pbtxt index 5784b4418c3112..9e60b7f507554b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ShutdownDistributedTPU.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ShutdownDistributedTPU.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ShutdownDistributedTPU" is_stateful: true } diff --git a/tensorflow/core/ops/compat/ops_history_v2/ShutdownTPUSystem.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ShutdownTPUSystem.pbtxt index df9e4f58f57dcf..ddddeddc63a18e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ShutdownTPUSystem.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ShutdownTPUSystem.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ShutdownTPUSystem" output_arg { name: "success" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Sigmoid.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Sigmoid.pbtxt index 16f433a337c378..dee59f6fa02f41 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Sigmoid.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Sigmoid.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Sigmoid" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SigmoidGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SigmoidGrad.pbtxt index 20c59f8f9311e6..788c3385098097 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SigmoidGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SigmoidGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SigmoidGrad" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Sign.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Sign.pbtxt index ee388e8ae2a4bf..07cb519996650f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Sign.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Sign.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Sign" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Sin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Sin.pbtxt index 6a3398c8da6e69..f6122e6e30b1f6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Sin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Sin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Sin" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Sinh.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Sinh.pbtxt index 0ff80863b11fda..7225234c7edcdf 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Sinh.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Sinh.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Sinh" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Size.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Size.pbtxt index 2c5e61f1496822..db039e4254ced1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Size.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Size.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Size" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SkipDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SkipDataset.pbtxt index e46a083e7dac16..07e0cf257f87ce 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SkipDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SkipDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SkipDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Skipgram.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Skipgram.pbtxt index 3734477985b239..d31bc826301db8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Skipgram.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Skipgram.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Skipgram" output_arg { name: "vocab_word" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SleepDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SleepDataset.pbtxt index c9cdd0432da162..0a1d637995e146 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SleepDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SleepDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SleepDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Slice.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Slice.pbtxt index 0d66369807d6ae..ced3fb6e0f0886 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Slice.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Slice.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Slice" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SlidingWindowDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SlidingWindowDataset.pbtxt index d3361f9d45b76e..ab63899bf4fb51 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SlidingWindowDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SlidingWindowDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SlidingWindowDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Snapshot.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Snapshot.pbtxt index c1fb2d69ed9253..aea213f7c50b1f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Snapshot.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Snapshot.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Snapshot" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SnapshotChunkDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SnapshotChunkDataset.pbtxt index 339bcd9980923d..e20fbcefee50e0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SnapshotChunkDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SnapshotChunkDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SnapshotChunkDataset" input_arg { name: "chunk_file" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SnapshotDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SnapshotDataset.pbtxt index b535c43a80371c..6d9002761ae02f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SnapshotDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SnapshotDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SnapshotDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SnapshotDatasetReader.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SnapshotDatasetReader.pbtxt index 7a9354125604db..d59d8edf2fc492 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SnapshotDatasetReader.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SnapshotDatasetReader.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SnapshotDatasetReader" input_arg { name: "shard_dir" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SnapshotDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SnapshotDatasetV2.pbtxt index e8636e3537e451..c9e244ed0e9099 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SnapshotDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SnapshotDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SnapshotDatasetV2" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SnapshotNestedDatasetReader.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SnapshotNestedDatasetReader.pbtxt index 60728b657a6dd4..078460bcb23930 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SnapshotNestedDatasetReader.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SnapshotNestedDatasetReader.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SnapshotNestedDatasetReader" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SobolSample.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SobolSample.pbtxt index 16377290c6194d..4fe7c45282a15f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SobolSample.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SobolSample.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SobolSample" input_arg { name: "dim" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Softmax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Softmax.pbtxt index 886e1dcb7168c8..03f499777eef54 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Softmax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Softmax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Softmax" input_arg { name: "logits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SoftmaxCrossEntropyWithLogits.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SoftmaxCrossEntropyWithLogits.pbtxt index 4b258297d20a27..8ac8052e30ffac 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SoftmaxCrossEntropyWithLogits.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SoftmaxCrossEntropyWithLogits.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SoftmaxCrossEntropyWithLogits" input_arg { name: "features" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Softplus.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Softplus.pbtxt index 66a82cdfd19bb1..3757e8d75039a8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Softplus.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Softplus.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Softplus" input_arg { name: "features" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SoftplusGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SoftplusGrad.pbtxt index 20ed580575492b..331b1abbf371f0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SoftplusGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SoftplusGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SoftplusGrad" input_arg { name: "gradients" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Softsign.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Softsign.pbtxt index 99ac45e08ba465..c83bc9929ab275 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Softsign.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Softsign.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Softsign" input_arg { name: "features" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SoftsignGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SoftsignGrad.pbtxt index da12fc0333ef5a..5411f9b5187758 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SoftsignGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SoftsignGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SoftsignGrad" input_arg { name: "gradients" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SortListOfSparseCoreCooTensors.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SortListOfSparseCoreCooTensors.pbtxt index 209998f61f0bd8..b86af96551b108 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SortListOfSparseCoreCooTensors.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SortListOfSparseCoreCooTensors.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SortListOfSparseCoreCooTensors" input_arg { name: "row_ids_list" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SpaceToBatch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SpaceToBatch.pbtxt index 6a3a33227e6b4f..155e1b3a985e44 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SpaceToBatch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SpaceToBatch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SpaceToBatch" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SpaceToBatchND.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SpaceToBatchND.pbtxt index 3b0379dbec0ae4..c38026e5cde09c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SpaceToBatchND.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SpaceToBatchND.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SpaceToBatchND" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SpaceToDepth.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SpaceToDepth.pbtxt index 3abc14c711a049..c7dd03ea1041fc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SpaceToDepth.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SpaceToDepth.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SpaceToDepth" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseAccumulatorApplyGradient.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseAccumulatorApplyGradient.pbtxt index cbd19b930643ff..7973ae8b558156 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseAccumulatorApplyGradient.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseAccumulatorApplyGradient.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseAccumulatorApplyGradient" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseAccumulatorTakeGradient.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseAccumulatorTakeGradient.pbtxt index 5a35297ec764e3..1aaa40667ff84a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseAccumulatorTakeGradient.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseAccumulatorTakeGradient.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseAccumulatorTakeGradient" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseAdd.pbtxt index 84b00a504d9f9d..06122b12980987 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseAdd" input_arg { name: "a_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseAddGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseAddGrad.pbtxt index 96192d0094101c..5c5e9aa0118f89 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseAddGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseAddGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseAddGrad" input_arg { name: "backprop_val_grad" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdadelta.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdadelta.pbtxt index abff5aab28cbc4..5cd7caf5d819d7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdadelta.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdadelta.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseApplyAdadelta" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdagrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdagrad.pbtxt index 248c28c0b8e9e7..3e2e873c9ab59d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdagrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdagrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseApplyAdagrad" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdagradDA.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdagradDA.pbtxt index 8dcc79dc49aa9c..e47c75ddc6cfa1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdagradDA.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdagradDA.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseApplyAdagradDA" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdagradV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdagradV2.pbtxt index 00a3ca86f43359..2b5360f68b6ae1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdagradV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyAdagradV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseApplyAdagradV2" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyCenteredRMSProp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyCenteredRMSProp.pbtxt index a5ae87d42c445a..8f958ce67f413a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyCenteredRMSProp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyCenteredRMSProp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseApplyCenteredRMSProp" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyFtrl.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyFtrl.pbtxt index d43fe26ffc1a11..17f289a6bbba7f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyFtrl.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyFtrl.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseApplyFtrl" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyFtrlV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyFtrlV2.pbtxt index 4ced4fe22d596a..1d7b1cab4169df 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyFtrlV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyFtrlV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseApplyFtrlV2" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyMomentum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyMomentum.pbtxt index 17e60ae80df5eb..ffabce2fb7af25 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyMomentum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyMomentum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseApplyMomentum" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyProximalAdagrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyProximalAdagrad.pbtxt index 5fb249592fba4f..c2a7440e3dcacb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyProximalAdagrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyProximalAdagrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseApplyProximalAdagrad" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyProximalGradientDescent.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyProximalGradientDescent.pbtxt index f04e6553369cc9..525119034a7653 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyProximalGradientDescent.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyProximalGradientDescent.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseApplyProximalGradientDescent" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyRMSProp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyRMSProp.pbtxt index aa27af72a44866..f5f7725c0e95e8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseApplyRMSProp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseApplyRMSProp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseApplyRMSProp" input_arg { name: "var" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseBincount.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseBincount.pbtxt index 9739aa7f2588de..9bbc5132845f1f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseBincount.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseBincount.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseBincount" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseConcat.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseConcat.pbtxt index 640957dd1750a5..ac291f4acbacce 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseConcat.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseConcat.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseConcat" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseConditionalAccumulator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseConditionalAccumulator.pbtxt index 26bc9fa77b61d6..59def3f130ef21 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseConditionalAccumulator.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseConditionalAccumulator.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseConditionalAccumulator" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseCountSparseOutput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseCountSparseOutput.pbtxt index d80e465d0205ba..ed79733f97fcdf 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseCountSparseOutput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseCountSparseOutput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseCountSparseOutput" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseCross.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseCross.pbtxt index 81bd4101693f0e..f25372f5808567 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseCross.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseCross.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseCross" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseCrossHashed.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseCrossHashed.pbtxt index b0be6ee5dbcc64..73002a92f24850 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseCrossHashed.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseCrossHashed.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseCrossHashed" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseCrossV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseCrossV2.pbtxt index e2a3e7053512c3..206542e4713902 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseCrossV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseCrossV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseCrossV2" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseDenseCwiseAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseDenseCwiseAdd.pbtxt index 474457187b36bf..ca71405a4c9f61 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseDenseCwiseAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseDenseCwiseAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseDenseCwiseAdd" input_arg { name: "sp_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseDenseCwiseDiv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseDenseCwiseDiv.pbtxt index d91c4c89585002..fe4b629ad1cf5b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseDenseCwiseDiv.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseDenseCwiseDiv.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseDenseCwiseDiv" input_arg { name: "sp_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseDenseCwiseMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseDenseCwiseMul.pbtxt index f6fd9e956c0884..80f7245d66d1bb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseDenseCwiseMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseDenseCwiseMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseDenseCwiseMul" input_arg { name: "sp_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseFillEmptyRows.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseFillEmptyRows.pbtxt index a93278ac57811e..d99257aa7103af 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseFillEmptyRows.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseFillEmptyRows.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseFillEmptyRows" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseFillEmptyRowsGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseFillEmptyRowsGrad.pbtxt index af97bf3745301b..87f1c5c4e2d1e3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseFillEmptyRowsGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseFillEmptyRowsGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseFillEmptyRowsGrad" input_arg { name: "reverse_index_map" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseMatMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseMatMul.pbtxt index e3a0ab25bb5f8d..d1eaa6a5edcb98 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseMatMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseMatMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseMatMul" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixAdd.pbtxt index 1a87d1beedee98..3a9efffdae686c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseMatrixAdd" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixMatMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixMatMul.pbtxt index db0ded9e1f7cae..a38613747717c9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixMatMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixMatMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseMatrixMatMul" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixMul.pbtxt index 1ed54fe50f9f58..649992a0298912 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseMatrixMul" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixNNZ.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixNNZ.pbtxt index 2073ae629c54cd..40363327c68c83 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixNNZ.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixNNZ.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseMatrixNNZ" input_arg { name: "sparse_matrix" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixOrderingAMD.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixOrderingAMD.pbtxt index cfa3dda19d4b26..b851bde928900d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixOrderingAMD.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixOrderingAMD.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseMatrixOrderingAMD" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSoftmax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSoftmax.pbtxt index 63579485eae114..d994082a1b3e9e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSoftmax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSoftmax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseMatrixSoftmax" input_arg { name: "logits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSoftmaxGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSoftmaxGrad.pbtxt index 133ffbbfc1047c..3bb68d7797d134 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSoftmaxGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSoftmaxGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseMatrixSoftmaxGrad" input_arg { name: "softmax" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSparseCholesky.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSparseCholesky.pbtxt index f14d9834fa04ef..1f74136d3a480e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSparseCholesky.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSparseCholesky.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseMatrixSparseCholesky" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSparseMatMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSparseMatMul.pbtxt index 51afd8da1a2ec7..3726d6bd8c0a4d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSparseMatMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixSparseMatMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseMatrixSparseMatMul" input_arg { name: "a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixTranspose.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixTranspose.pbtxt index ea071da4e77177..be6bcd6dc1e496 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixTranspose.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixTranspose.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseMatrixTranspose" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixZeros.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixZeros.pbtxt index 87b09ffd306f2c..6d743e24a0ad4c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixZeros.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseMatrixZeros.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseMatrixZeros" input_arg { name: "dense_shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseReduceMax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseReduceMax.pbtxt index 86a4d1e0046906..4df1254af7460d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseReduceMax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseReduceMax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseReduceMax" input_arg { name: "input_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseReduceMaxSparse.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseReduceMaxSparse.pbtxt index c44461c9d090be..81896440ab75a9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseReduceMaxSparse.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseReduceMaxSparse.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseReduceMaxSparse" input_arg { name: "input_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseReduceSum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseReduceSum.pbtxt index d0e5258dad8801..9f5a2d2e2be133 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseReduceSum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseReduceSum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseReduceSum" input_arg { name: "input_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseReduceSumSparse.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseReduceSumSparse.pbtxt index 12a18fbda1c045..b554e7ca6a00f6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseReduceSumSparse.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseReduceSumSparse.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseReduceSumSparse" input_arg { name: "input_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseReorder.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseReorder.pbtxt index 9a9bd24e406c00..5c5ad9078385e6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseReorder.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseReorder.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseReorder" input_arg { name: "input_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseReshape.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseReshape.pbtxt index ab9f157b09f23f..934b5010a1e415 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseReshape.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseReshape.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseReshape" input_arg { name: "input_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMean.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMean.pbtxt index 6f56f3f5364548..0891aa6b0175ec 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMean.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMean.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSegmentMean" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMeanGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMeanGrad.pbtxt index a75fd742bf4e91..54e44b3e62d64b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMeanGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMeanGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSegmentMeanGrad" input_arg { name: "grad" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMeanGradV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMeanGradV2.pbtxt index b02a4838190c33..e1282b858a90ec 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMeanGradV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMeanGradV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSegmentMeanGradV2" input_arg { name: "grad" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMeanWithNumSegments.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMeanWithNumSegments.pbtxt index 930da74b96b061..a27a2efc560465 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMeanWithNumSegments.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentMeanWithNumSegments.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSegmentMeanWithNumSegments" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtN.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtN.pbtxt index feec752c94d180..b7a209c48d9ee6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtN.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtN.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSegmentSqrtN" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtNGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtNGrad.pbtxt index 18397fbb9cda45..d9a3c930a122de 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtNGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtNGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSegmentSqrtNGrad" input_arg { name: "grad" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtNGradV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtNGradV2.pbtxt index 1d8a3b4c921189..9436be8beb8e17 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtNGradV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtNGradV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSegmentSqrtNGradV2" input_arg { name: "grad" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtNWithNumSegments.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtNWithNumSegments.pbtxt index 5aa63ca35fcace..b67358819acdc6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtNWithNumSegments.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSqrtNWithNumSegments.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSegmentSqrtNWithNumSegments" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSum.pbtxt index db9042011a6f8d..5962058393cf14 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSegmentSum" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSumGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSumGrad.pbtxt index e441e7f0a60f69..ce9742f898ef6b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSumGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSumGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSegmentSumGrad" input_arg { name: "grad" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSumGradV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSumGradV2.pbtxt index 4643278f47fad1..3baf45183e9464 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSumGradV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSumGradV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSegmentSumGradV2" input_arg { name: "grad" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSumWithNumSegments.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSumWithNumSegments.pbtxt index 55674b28bbae69..4b8183aeeeabcd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSumWithNumSegments.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSegmentSumWithNumSegments.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSegmentSumWithNumSegments" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSlice.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSlice.pbtxt index 60d914313b3da7..a6434cbfa71d2c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSlice.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSlice.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSlice" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSliceGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSliceGrad.pbtxt index d3d6693044a564..77134f37194b5a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSliceGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSliceGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSliceGrad" input_arg { name: "backprop_val_grad" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSoftmax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSoftmax.pbtxt index 003ea791ed61f0..5e447367f3be6d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSoftmax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSoftmax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSoftmax" input_arg { name: "sp_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSoftmaxCrossEntropyWithLogits.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSoftmaxCrossEntropyWithLogits.pbtxt index 884008f4ea9937..57d8f4c4662535 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSoftmaxCrossEntropyWithLogits.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSoftmaxCrossEntropyWithLogits.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSoftmaxCrossEntropyWithLogits" input_arg { name: "features" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSparseMaximum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSparseMaximum.pbtxt index c1bb9cb237fb35..bdd017c2252867 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSparseMaximum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSparseMaximum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSparseMaximum" input_arg { name: "a_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSparseMinimum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSparseMinimum.pbtxt index 7d1569221cce76..b600e95b3ae3bb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSparseMinimum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSparseMinimum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSparseMinimum" input_arg { name: "a_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseSplit.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseSplit.pbtxt index af8deac1a17908..997b2b21abdd44 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseSplit.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseSplit.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseSplit" input_arg { name: "split_dim" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseTensorDenseAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseTensorDenseAdd.pbtxt index c1b647cef05e96..8e95e67610204f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseTensorDenseAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseTensorDenseAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseTensorDenseAdd" input_arg { name: "a_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseTensorDenseMatMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseTensorDenseMatMul.pbtxt index 8fa6733bfc1b91..ce66c5306a5c2a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseTensorDenseMatMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseTensorDenseMatMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseTensorDenseMatMul" input_arg { name: "a_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseTensorSliceDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseTensorSliceDataset.pbtxt index bc7b81876d1c4e..af26fd8c180a3f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseTensorSliceDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseTensorSliceDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseTensorSliceDataset" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseTensorToCSRSparseMatrix.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseTensorToCSRSparseMatrix.pbtxt index a1215c2000ae1e..b45376fcd00d3c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseTensorToCSRSparseMatrix.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseTensorToCSRSparseMatrix.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseTensorToCSRSparseMatrix" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseToDense.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseToDense.pbtxt index 7e693267c06d2a..351603424e6ccd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseToDense.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseToDense.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseToDense" input_arg { name: "sparse_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseToSparseSetOperation.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseToSparseSetOperation.pbtxt index bac3763b0510a7..a7775a2f24a465 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SparseToSparseSetOperation.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SparseToSparseSetOperation.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SparseToSparseSetOperation" input_arg { name: "set1_indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Spence.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Spence.pbtxt index fefe73924fda23..7032cac3dce437 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Spence.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Spence.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Spence" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Split.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Split.pbtxt index 3d69a43f2e71ec..49428f7e5ce590 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Split.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Split.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Split" input_arg { name: "split_dim" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SplitDedupData.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SplitDedupData.pbtxt index 53d3ccdf1c6a6f..ada9d479888933 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SplitDedupData.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SplitDedupData.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SplitDedupData" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SplitV.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SplitV.pbtxt index 577d8a604fac7b..706d1c3c81c729 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SplitV.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SplitV.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SplitV" input_arg { name: "value" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SqlDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SqlDataset.pbtxt index fb233f139edaa7..68af0ac17eff32 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SqlDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SqlDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SqlDataset" input_arg { name: "driver_name" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Sqrt.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Sqrt.pbtxt index 371520ff6ae795..3c566b98b0dbff 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Sqrt.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Sqrt.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Sqrt" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SqrtGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SqrtGrad.pbtxt index 055baa35e98f93..d738e2023ffc24 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SqrtGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SqrtGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SqrtGrad" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Square.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Square.pbtxt index 52c0e31486d601..7501f29f1d95e4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Square.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Square.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Square" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SquaredDifference.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SquaredDifference.pbtxt index a680e0087f531d..29ea33c95e2d2a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SquaredDifference.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SquaredDifference.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SquaredDifference" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Squeeze.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Squeeze.pbtxt index d3733f99e07910..54335545f4807f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Squeeze.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Squeeze.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Squeeze" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Stack.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Stack.pbtxt index e4398a4783de8f..e8e459cfe2c08d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Stack.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Stack.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Stack" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StackClose.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StackClose.pbtxt index dd2b8efd9c95ab..8c916ab52a2c69 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StackClose.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StackClose.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StackClose" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StackCloseV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StackCloseV2.pbtxt index 66ddab1d90bed8..18c5934b0d7961 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StackCloseV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StackCloseV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StackCloseV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StackPop.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StackPop.pbtxt index 53965ff252d508..80e3ef79d09c61 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StackPop.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StackPop.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StackPop" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StackPopV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StackPopV2.pbtxt index 77300ed42e46f9..438d52b8ea5625 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StackPopV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StackPopV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StackPopV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StackPush.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StackPush.pbtxt index f2052fe41c1ab4..44fae0ce455f0c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StackPush.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StackPush.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StackPush" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StackPushV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StackPushV2.pbtxt index 6b3f8b5655b2a3..7149b4fda435c6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StackPushV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StackPushV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StackPushV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StackV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StackV2.pbtxt index 5d934a7b8bb00b..606361dd26fdb1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StackV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StackV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StackV2" input_arg { name: "max_size" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Stage.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Stage.pbtxt index d7b0fbc1b9a4b7..8a64d696118f7f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Stage.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Stage.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Stage" input_arg { name: "values" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StageClear.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StageClear.pbtxt index 7ce3219c586d58..1f43cdb901967d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StageClear.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StageClear.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StageClear" attr { name: "capacity" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StagePeek.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StagePeek.pbtxt index e9e6c68ed56344..a7397c488167fe 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StagePeek.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StagePeek.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StagePeek" input_arg { name: "index" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StageSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StageSize.pbtxt index b9bb218b6909aa..6f22fd3d032706 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StageSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StageSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StageSize" output_arg { name: "size" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatefulPartitionedCall.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatefulPartitionedCall.pbtxt index a069806185b334..7d411c155ab2b9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatefulPartitionedCall.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatefulPartitionedCall.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatefulPartitionedCall" input_arg { name: "args" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatefulRandomBinomial.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatefulRandomBinomial.pbtxt index 798854feef481b..97eb7d4e8f0fa0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatefulRandomBinomial.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatefulRandomBinomial.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatefulRandomBinomial" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatefulStandardNormal.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatefulStandardNormal.pbtxt index 4738b843a0b92a..44ef92c5a8f7ed 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatefulStandardNormal.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatefulStandardNormal.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatefulStandardNormal" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatefulStandardNormalV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatefulStandardNormalV2.pbtxt index 76e31871a6bbae..1b99b2320b0264 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatefulStandardNormalV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatefulStandardNormalV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatefulStandardNormalV2" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatefulTruncatedNormal.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatefulTruncatedNormal.pbtxt index 8ef450390d5367..e74de4f0fceb55 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatefulTruncatedNormal.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatefulTruncatedNormal.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatefulTruncatedNormal" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatefulUniform.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatefulUniform.pbtxt index fef8635b75cb94..fd2b87c6e45988 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatefulUniform.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatefulUniform.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatefulUniform" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatefulUniformFullInt.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatefulUniformFullInt.pbtxt index 0536cdf62ce4cc..35ab70e0f3a1e2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatefulUniformFullInt.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatefulUniformFullInt.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatefulUniformFullInt" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatefulUniformInt.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatefulUniformInt.pbtxt index cd71c24e2762c1..06f62faaace7b6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatefulUniformInt.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatefulUniformInt.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatefulUniformInt" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessCase.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessCase.pbtxt index 367c9ddfdb9602..174c00e5c8ab4a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessCase.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessCase.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessCase" input_arg { name: "branch_index" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessIf.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessIf.pbtxt index a38480703ef14a..6eda6df052a58b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessIf.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessIf.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessIf" input_arg { name: "cond" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessMultinomial.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessMultinomial.pbtxt index 3a06bef6ef7747..16dac7dfd152bb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessMultinomial.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessMultinomial.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessMultinomial" input_arg { name: "logits" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessParameterizedTruncatedNormal.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessParameterizedTruncatedNormal.pbtxt index 86509705c4f188..598125677b114a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessParameterizedTruncatedNormal.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessParameterizedTruncatedNormal.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessParameterizedTruncatedNormal" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomBinomial.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomBinomial.pbtxt index da4c351d221a53..1ba7e5c119b147 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomBinomial.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomBinomial.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomBinomial" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGammaV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGammaV2.pbtxt index 56d573cf2ec6d9..b3988b1f407d96 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGammaV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGammaV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomGammaV2" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGammaV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGammaV3.pbtxt index d2fbd60387e24d..00d9da80c101e1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGammaV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGammaV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomGammaV3" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGetAlg.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGetAlg.pbtxt index ff50f6fad96a1c..522fce59c88a05 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGetAlg.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGetAlg.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomGetAlg" output_arg { name: "alg" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGetKeyCounter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGetKeyCounter.pbtxt index 7532f9b2f6ffca..0d9c547181bb61 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGetKeyCounter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGetKeyCounter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomGetKeyCounter" input_arg { name: "seed" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGetKeyCounterAlg.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGetKeyCounterAlg.pbtxt index 6ed78cbec62348..4897ee80bb82d5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGetKeyCounterAlg.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomGetKeyCounterAlg.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomGetKeyCounterAlg" input_arg { name: "seed" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomNormal.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomNormal.pbtxt index 0de87cb2569ce2..804d904c148234 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomNormal.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomNormal.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomNormal" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomNormalV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomNormalV2.pbtxt index e5a48f3116056e..dac945afe5354e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomNormalV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomNormalV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomNormalV2" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomPoisson.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomPoisson.pbtxt index 575f54b2e22617..525b933d8b005b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomPoisson.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomPoisson.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomPoisson" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniform.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniform.pbtxt index d27528775e2460..22a5b25466b90f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniform.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniform.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomUniform" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformFullInt.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformFullInt.pbtxt index ede922b448a4de..a9d652634d7cb7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformFullInt.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformFullInt.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomUniformFullInt" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformFullIntV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformFullIntV2.pbtxt index 5a5bd12f262c5d..d4511c5447bda5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformFullIntV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformFullIntV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomUniformFullIntV2" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformInt.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformInt.pbtxt index 13014cca798e16..834a6fd5ad3983 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformInt.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformInt.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomUniformInt" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformIntV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformIntV2.pbtxt index 96bc39656a4b17..be4ed6072852be 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformIntV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformIntV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomUniformIntV2" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformV2.pbtxt index 9434be69beff1f..f66ee72bd4af30 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessRandomUniformV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessRandomUniformV2" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessSampleDistortedBoundingBox.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessSampleDistortedBoundingBox.pbtxt index 6c322d848ac079..6858a110cf46f5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessSampleDistortedBoundingBox.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessSampleDistortedBoundingBox.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessSampleDistortedBoundingBox" input_arg { name: "image_size" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessShuffle.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessShuffle.pbtxt index 06b6eb02eac58d..eab3565990135e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessShuffle.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessShuffle.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessShuffle" input_arg { name: "value" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessTruncatedNormal.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessTruncatedNormal.pbtxt index a301c48afbe058..c8c8d850341f5f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessTruncatedNormal.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessTruncatedNormal.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessTruncatedNormal" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessTruncatedNormalV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessTruncatedNormalV2.pbtxt index f074ea7423b525..23f886f104dd24 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessTruncatedNormalV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessTruncatedNormalV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessTruncatedNormalV2" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessWhile.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessWhile.pbtxt index dc0b6353373c65..28579edbde5e67 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatelessWhile.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessWhile.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatelessWhile" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StaticRegexFullMatch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StaticRegexFullMatch.pbtxt index d053ad6e1db6f6..be6078c102232d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StaticRegexFullMatch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StaticRegexFullMatch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StaticRegexFullMatch" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StaticRegexReplace.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StaticRegexReplace.pbtxt index e570f0de09ff55..fe3eb69a1a0044 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StaticRegexReplace.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StaticRegexReplace.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StaticRegexReplace" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorHandle.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorHandle.pbtxt index 45080da97423ca..2d55e00492fddc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorHandle.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorHandle.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatsAggregatorHandle" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorHandleV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorHandleV2.pbtxt index 0fca6c398316bf..7dc361e958d794 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorHandleV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorHandleV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatsAggregatorHandleV2" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorSetSummaryWriter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorSetSummaryWriter.pbtxt index 55d5207beec293..24730ade1494c6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorSetSummaryWriter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorSetSummaryWriter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatsAggregatorSetSummaryWriter" input_arg { name: "stats_aggregator" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorSummary.pbtxt index 92d1f2ba507923..a0702a11168ff4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StatsAggregatorSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StatsAggregatorSummary" input_arg { name: "iterator" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StochasticCastToInt.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StochasticCastToInt.pbtxt index 66896c5d662940..930525b4364525 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StochasticCastToInt.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StochasticCastToInt.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StochasticCastToInt" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StopGradient.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StopGradient.pbtxt index 06500717b6034a..26f7c677ab8e8a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StopGradient.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StopGradient.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StopGradient" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StoreMinibatchStatisticsInFdo.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StoreMinibatchStatisticsInFdo.pbtxt index 2250ba0eee369c..22766cb4409917 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StoreMinibatchStatisticsInFdo.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StoreMinibatchStatisticsInFdo.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StoreMinibatchStatisticsInFdo" input_arg { name: "program_key" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StridedSlice.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StridedSlice.pbtxt index 60186d9449d593..2c60bcb73c757f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StridedSlice.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StridedSlice.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StridedSlice" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StridedSliceAssign.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StridedSliceAssign.pbtxt index ba3eaed57e270e..8393dc7272c59c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StridedSliceAssign.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StridedSliceAssign.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StridedSliceAssign" input_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StridedSliceGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StridedSliceGrad.pbtxt index 92bad979c0f579..14f6a464020a5a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StridedSliceGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StridedSliceGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StridedSliceGrad" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StringFormat.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StringFormat.pbtxt index 23c48e47c2761d..bea32908608e6f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StringFormat.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StringFormat.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StringFormat" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StringJoin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StringJoin.pbtxt index a45262ee65ed29..790cb7b5b6c8d6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StringJoin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StringJoin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StringJoin" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StringLength.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StringLength.pbtxt index 169adeecbed33f..5bdf993f907a2a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StringLength.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StringLength.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StringLength" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StringLower.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StringLower.pbtxt index 5f7dbf3b2bb283..1c886146d0560d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StringLower.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StringLower.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StringLower" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StringNGrams.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StringNGrams.pbtxt index 4281b96aee5007..025fc052819bf2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StringNGrams.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StringNGrams.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StringNGrams" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StringSplit.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StringSplit.pbtxt index 1832d334e79837..35e8594235e170 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StringSplit.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StringSplit.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StringSplit" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StringSplitV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StringSplitV2.pbtxt index 03ba25fd70168b..fbdf8e06f372c1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StringSplitV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StringSplitV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StringSplitV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StringStrip.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StringStrip.pbtxt index 153bd63f4e2313..3fff999e93789b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StringStrip.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StringStrip.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StringStrip" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StringToHashBucket.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StringToHashBucket.pbtxt index 6cbdd4c7bf8c28..7147a40a12d5af 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StringToHashBucket.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StringToHashBucket.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StringToHashBucket" input_arg { name: "string_tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StringToHashBucketFast.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StringToHashBucketFast.pbtxt index a07a00a41eecee..8ef1227faae9b7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StringToHashBucketFast.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StringToHashBucketFast.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StringToHashBucketFast" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StringToHashBucketStrong.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StringToHashBucketStrong.pbtxt index de121b287b3aa9..2dbd9920711fe4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StringToHashBucketStrong.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StringToHashBucketStrong.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StringToHashBucketStrong" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/StringToNumber.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StringToNumber.pbtxt index 833aaa61533c1f..fb09d67516e7fd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StringToNumber.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StringToNumber.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StringToNumber" input_arg { name: "string_tensor" @@ -48,3 +48,31 @@ op { } } } +op { + name: "StringToNumber" + input_arg { + name: "string_tensor" + type: DT_STRING + } + output_arg { + name: "output" + type_attr: "out_type" + } + attr { + name: "out_type" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/StringUpper.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StringUpper.pbtxt index 69c6133ad0501b..8df4881554cefe 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/StringUpper.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/StringUpper.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "StringUpper" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Sub.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Sub.pbtxt index 44761cb1a3bf95..4d89817f561874 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Sub.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Sub.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Sub" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Substr.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Substr.pbtxt index ebfc6279bbe635..a5c1d2c0ae0aa8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Substr.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Substr.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Substr" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Sum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Sum.pbtxt index 4a71be5f59cfe9..fb7ce8600c4ecd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Sum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Sum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Sum" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SummaryWriter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SummaryWriter.pbtxt index 199ded68ff3bbf..a6fd9170f2a121 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SummaryWriter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SummaryWriter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SummaryWriter" output_arg { name: "writer" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Svd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Svd.pbtxt index 3750a39144739e..48003906cc05ed 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Svd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Svd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Svd" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Switch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Switch.pbtxt index 2ff607548010d7..0856f3459b3c02 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Switch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Switch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Switch" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SymbolicGradient.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SymbolicGradient.pbtxt index 5193954ea7eebd..aae5457863e176 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SymbolicGradient.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SymbolicGradient.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SymbolicGradient" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/SyncDevice.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SyncDevice.pbtxt index cc6bf6b6f0c0b0..e55c5f4ade1d4b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/SyncDevice.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/SyncDevice.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "SyncDevice" is_stateful: true } diff --git a/tensorflow/core/ops/compat/ops_history_v2/TFRecordDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TFRecordDataset.pbtxt index b68eea8fbd264c..6caa343ebc05b8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TFRecordDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TFRecordDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TFRecordDataset" input_arg { name: "filenames" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TFRecordDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TFRecordDatasetV2.pbtxt index af944b699a0e9d..d3b89247493e91 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TFRecordDatasetV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TFRecordDatasetV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TFRecordDatasetV2" input_arg { name: "filenames" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TFRecordReader.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TFRecordReader.pbtxt index 0f223c57ec5ed3..684c21ea45e8f5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TFRecordReader.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TFRecordReader.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TFRecordReader" output_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TFRecordReaderV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TFRecordReaderV2.pbtxt index 0f9f02ce01d876..bcdb4764d378da 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TFRecordReaderV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TFRecordReaderV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TFRecordReaderV2" output_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUAnnotateTensorsWithDynamicShape.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUAnnotateTensorsWithDynamicShape.pbtxt index 09d484e3194e76..eb1f07856a9ef6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUAnnotateTensorsWithDynamicShape.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUAnnotateTensorsWithDynamicShape.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUAnnotateTensorsWithDynamicShape" input_arg { name: "tensors" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUCompilationResult.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUCompilationResult.pbtxt index bdaf1f9e51a8c9..04a95cc089fd4e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUCompilationResult.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUCompilationResult.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUCompilationResult" output_arg { name: "output" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUCompile.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUCompile.pbtxt index 7328f9214e3be6..be95091c809227 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUCompile.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUCompile.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUCompile" input_arg { name: "dynamic_shapes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUCompileSucceededAssert.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUCompileSucceededAssert.pbtxt index a39d0e72843b10..bc1b3c153f10ec 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUCompileSucceededAssert.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUCompileSucceededAssert.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUCompileSucceededAssert" input_arg { name: "compilation_status" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUCopyWithDynamicShape.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUCopyWithDynamicShape.pbtxt index 8b897ff34cca06..1e8386d91a5760 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUCopyWithDynamicShape.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUCopyWithDynamicShape.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUCopyWithDynamicShape" input_arg { name: "tensors" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUEmbeddingActivations.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUEmbeddingActivations.pbtxt index 0bd460f3a0bf50..3975077297a6fa 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUEmbeddingActivations.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUEmbeddingActivations.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUEmbeddingActivations" input_arg { name: "embedding_variable" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUExecute.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUExecute.pbtxt index a231036be50e8b..97a2c2a1f6673d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUExecute.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUExecute.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUExecute" input_arg { name: "args" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUExecuteAndUpdateVariables.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUExecuteAndUpdateVariables.pbtxt index 2d41c28beff465..5a611f9202d83d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUExecuteAndUpdateVariables.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUExecuteAndUpdateVariables.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUExecuteAndUpdateVariables" input_arg { name: "args" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUOrdinalSelector.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUOrdinalSelector.pbtxt index de910326bd6a79..3fb272504068ce 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUOrdinalSelector.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUOrdinalSelector.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUOrdinalSelector" output_arg { name: "device_ordinals" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedCall.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedCall.pbtxt index cc0652a3790b31..1ec9cb3e43400f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedCall.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedCall.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUPartitionedCall" input_arg { name: "args" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedInput.pbtxt index b5eca4ed94d216..aab0574d99eb95 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedInput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedInput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUPartitionedInput" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedInputV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedInputV2.pbtxt index 52ec50cc1bf7bd..fe0ef345a1c628 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedInputV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedInputV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUPartitionedInputV2" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedOutput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedOutput.pbtxt index ad5122c3f1b732..38a85e319644ac 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedOutput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedOutput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUPartitionedOutput" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedOutputV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedOutputV2.pbtxt index 83b7375fa2f1bf..3e7a6039ad8ca0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedOutputV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedOutputV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUPartitionedOutputV2" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUReplicateMetadata.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUReplicateMetadata.pbtxt index 9ea81b7f929a1f..9742ad0d8d40c9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUReplicateMetadata.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUReplicateMetadata.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUReplicateMetadata" attr { name: "num_replicas" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUReplicatedInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUReplicatedInput.pbtxt index f5a8ddcf284366..b549b570c13777 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUReplicatedInput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUReplicatedInput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUReplicatedInput" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUReplicatedOutput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUReplicatedOutput.pbtxt index f7e9600cf4fb99..70b7d0ae71aadc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUReplicatedOutput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUReplicatedOutput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUReplicatedOutput" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUReshardVariables.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUReshardVariables.pbtxt index 0bc7b4611fc7e8..fecd05d06eb1b9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPUReshardVariables.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUReshardVariables.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPUReshardVariables" input_arg { name: "vars" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPURoundRobin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPURoundRobin.pbtxt index 1405bc8cd6b167..7630e0cac746a0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TPURoundRobin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TPURoundRobin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TPURoundRobin" output_arg { name: "device_ordinal" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TakeDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TakeDataset.pbtxt index 4d9da96372ae09..8ced9c67054cc1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TakeDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TakeDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TakeDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TakeManySparseFromTensorsMap.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TakeManySparseFromTensorsMap.pbtxt index 7b852a26ddec81..0e3ca630eb178e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TakeManySparseFromTensorsMap.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TakeManySparseFromTensorsMap.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TakeManySparseFromTensorsMap" input_arg { name: "sparse_handles" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TakeWhileDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TakeWhileDataset.pbtxt index 7586b070a73be4..bfde2664966ef2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TakeWhileDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TakeWhileDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TakeWhileDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Tan.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Tan.pbtxt index 45442d6d712d71..a78f07b8d21382 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Tan.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Tan.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Tan" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Tanh.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Tanh.pbtxt index 9b7ccfe0bc9503..1672b0dc825c79 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Tanh.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Tanh.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Tanh" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TanhGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TanhGrad.pbtxt index 41a827121b8c56..67d28f8ad7e0b4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TanhGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TanhGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TanhGrad" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TemporaryVariable.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TemporaryVariable.pbtxt index d5c19d9f1ef34d..191354ec959700 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TemporaryVariable.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TemporaryVariable.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TemporaryVariable" output_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArray.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArray.pbtxt index 67aa5f61327ac7..74b1a54976cc8b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArray.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArray.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArray" input_arg { name: "size" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayClose.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayClose.pbtxt index 44b06bcbf10e95..63c0100942005b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayClose.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayClose.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayClose" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayCloseV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayCloseV2.pbtxt index 5298e3d85742b8..b0fb5804f1a2cf 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayCloseV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayCloseV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayCloseV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayCloseV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayCloseV3.pbtxt index 63af4407f62ae6..c5d1c2b1f244ce 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayCloseV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayCloseV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayCloseV3" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayConcat.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayConcat.pbtxt index 1111e79677e141..e2c59abd687402 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayConcat.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayConcat.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayConcat" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayConcatV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayConcatV2.pbtxt index b08f04af049c80..72376bd561c910 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayConcatV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayConcatV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayConcatV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayConcatV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayConcatV3.pbtxt index 70a7939d25c9d2..91e575ca87f6b3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayConcatV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayConcatV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayConcatV3" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGather.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGather.pbtxt index 30b54dbce7b8b4..a8ded38550bc8b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGather.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGather.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayGather" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGatherV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGatherV2.pbtxt index 3025ec43a0da9c..f72968388437f6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGatherV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGatherV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayGatherV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGatherV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGatherV3.pbtxt index bf80504e78a1fb..c87538a40d26ab 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGatherV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGatherV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayGatherV3" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGrad.pbtxt index 326cb594c3b5af..422154510dbcdf 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayGrad" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGradV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGradV2.pbtxt index 092dd8435055b7..d989c4071435ee 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGradV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGradV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayGradV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGradV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGradV3.pbtxt index 749e282ae92a2d..53e20429ec0e6f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGradV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGradV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayGradV3" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGradWithShape.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGradWithShape.pbtxt index 100c3e7c78a4ef..1ce739062eb4c2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGradWithShape.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayGradWithShape.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayGradWithShape" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayPack.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayPack.pbtxt index 7b4ca8e9f4d39e..f608e453cee31b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayPack.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayPack.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayPack" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayRead.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayRead.pbtxt index c3398036b5e4b5..62660bec758965 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayRead.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayRead.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayRead" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayReadV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayReadV2.pbtxt index 5620c0ade70a6c..cd0a2a32a8c06b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayReadV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayReadV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayReadV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayReadV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayReadV3.pbtxt index bb53ce3649d56c..59e66fc84d5cdb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayReadV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayReadV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayReadV3" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayScatter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayScatter.pbtxt index 37c69a9b398e2e..b2017163f5c072 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayScatter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayScatter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayScatter" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayScatterV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayScatterV2.pbtxt index 798222d0d00d64..1eacf2d9acfa3c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayScatterV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayScatterV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayScatterV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayScatterV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayScatterV3.pbtxt index c9ecffbc9692c1..5053ed60b1130e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayScatterV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayScatterV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayScatterV3" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArraySize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArraySize.pbtxt index 690f2118b6269f..7f6ce9510a0bff 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArraySize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArraySize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArraySize" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArraySizeV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArraySizeV2.pbtxt index bf446335854293..8ee9eda30bd21f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArraySizeV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArraySizeV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArraySizeV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArraySizeV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArraySizeV3.pbtxt index c156e6af69ea9b..8932b0dcf2ddf7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArraySizeV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArraySizeV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArraySizeV3" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArraySplit.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArraySplit.pbtxt index 4a693b29ff9cba..06bf8bfc3595fd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArraySplit.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArraySplit.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArraySplit" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArraySplitV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArraySplitV2.pbtxt index fa9e02adb9326b..b45ea7a6108a66 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArraySplitV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArraySplitV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArraySplitV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArraySplitV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArraySplitV3.pbtxt index 2eed16078de27e..c072c0c65fc008 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArraySplitV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArraySplitV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArraySplitV3" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayUnpack.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayUnpack.pbtxt index 6fd145fbf55fac..81e5abec891b9b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayUnpack.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayUnpack.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayUnpack" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayV2.pbtxt index 3c01113a63e8c4..1293e1999c2031 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayV2" input_arg { name: "size" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayV3.pbtxt index ef70cea090839b..906e407de181e0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayV3" input_arg { name: "size" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayWrite.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayWrite.pbtxt index af5ad923f1d8c1..8f1a94c36b3878 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayWrite.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayWrite.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayWrite" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayWriteV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayWriteV2.pbtxt index ba33173b1dc971..fa0c1a679f7dfa 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayWriteV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayWriteV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayWriteV2" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayWriteV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayWriteV3.pbtxt index 44670bec744cf5..45327d42b1be1b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorArrayWriteV3.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorArrayWriteV3.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorArrayWriteV3" input_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorDataset.pbtxt index a70bea1c6975d5..9e71deef2c597c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorDataset" input_arg { name: "components" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListConcat.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListConcat.pbtxt index 8b715d4864011e..010be2e120b46e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListConcat.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListConcat.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListConcat" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListConcatLists.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListConcatLists.pbtxt index e532be3918c367..57dd05a90feb7d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListConcatLists.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListConcatLists.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListConcatLists" input_arg { name: "input_a" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListConcatV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListConcatV2.pbtxt index 6051430fd552bf..0bb9546d155d29 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListConcatV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListConcatV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListConcatV2" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListElementShape.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListElementShape.pbtxt index e4143fdbe9dd06..26b982f6cfd9c3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListElementShape.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListElementShape.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListElementShape" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListFromTensor.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListFromTensor.pbtxt index f3111b9756db4e..c2be2938c8607e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListFromTensor.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListFromTensor.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListFromTensor" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListGather.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListGather.pbtxt index 271fc2db2c0161..43b4773a4f7e0f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListGather.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListGather.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListGather" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListGetItem.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListGetItem.pbtxt index ff087f92a0721a..fa124bc94971fe 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListGetItem.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListGetItem.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListGetItem" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListLength.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListLength.pbtxt index 8b95320f200740..b4ea660dca13de 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListLength.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListLength.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListLength" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListPopBack.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListPopBack.pbtxt index 61dd1111cb64f5..35aa68e07584cc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListPopBack.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListPopBack.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListPopBack" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListPushBack.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListPushBack.pbtxt index 1ae20365fb9bab..8175cfe350dab0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListPushBack.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListPushBack.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListPushBack" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListPushBackBatch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListPushBackBatch.pbtxt index 6e805caed1bcc2..29b878e527a4d5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListPushBackBatch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListPushBackBatch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListPushBackBatch" input_arg { name: "input_handles" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListReserve.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListReserve.pbtxt index bef49b63477ebe..98ade8cb4a9fd0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListReserve.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListReserve.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListReserve" input_arg { name: "element_shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListResize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListResize.pbtxt index 50e070018e2a0b..b322d89d47aa68 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListResize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListResize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListResize" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListScatter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListScatter.pbtxt index 31fa2452bb11d9..daa2f4130ab06b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListScatter.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListScatter.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListScatter" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListScatterIntoExistingList.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListScatterIntoExistingList.pbtxt index 311157650998f6..4427bab8a358c2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListScatterIntoExistingList.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListScatterIntoExistingList.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListScatterIntoExistingList" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListScatterV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListScatterV2.pbtxt index 29cf4aba78680f..de588984614839 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListScatterV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListScatterV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListScatterV2" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListSetItem.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListSetItem.pbtxt index d16b98d59bdc54..e2510ca98e0b8f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListSetItem.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListSetItem.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListSetItem" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListSplit.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListSplit.pbtxt index 2ed8f207fed4c3..ff83247addf89b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListSplit.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListSplit.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListSplit" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorListStack.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorListStack.pbtxt index 8623e34a934845..5a8e7bcd81b9ef 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorListStack.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorListStack.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorListStack" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorMapErase.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorMapErase.pbtxt index 009fd49ee6288a..854e7311eab331 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorMapErase.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorMapErase.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorMapErase" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorMapHasKey.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorMapHasKey.pbtxt index e2856e3ecaea56..a095c36d7c26f5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorMapHasKey.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorMapHasKey.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorMapHasKey" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorMapInsert.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorMapInsert.pbtxt index 492d3538de0ed5..10061ea1cde6dc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorMapInsert.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorMapInsert.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorMapInsert" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorMapLookup.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorMapLookup.pbtxt index 9f1097226fc7b3..b48fda8ac4623f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorMapLookup.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorMapLookup.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorMapLookup" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorMapSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorMapSize.pbtxt index a94f2c515ece36..dd8ade84414f56 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorMapSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorMapSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorMapSize" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorMapStackKeys.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorMapStackKeys.pbtxt index 10aef43aca1343..c3befaa320a385 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorMapStackKeys.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorMapStackKeys.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorMapStackKeys" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorScatterAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorScatterAdd.pbtxt index 71f0e9fb625334..5fb5b8cb0dd693 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorScatterAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorScatterAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorScatterAdd" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorScatterMax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorScatterMax.pbtxt index 3abba955f62158..84a05c9bd5dba9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorScatterMax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorScatterMax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorScatterMax" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorScatterMin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorScatterMin.pbtxt index 047619805f2aff..d1ae6117921ee0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorScatterMin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorScatterMin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorScatterMin" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorScatterSub.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorScatterSub.pbtxt index d66d823ef317c8..81920523ae9bbc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorScatterSub.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorScatterSub.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorScatterSub" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorScatterUpdate.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorScatterUpdate.pbtxt index 4bc0747881f943..1e8281cc247958 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorScatterUpdate.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorScatterUpdate.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorScatterUpdate" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorSliceDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorSliceDataset.pbtxt index 7be6e03ca4a19c..3810a6b4023657 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorSliceDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorSliceDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorSliceDataset" input_arg { name: "components" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorStridedSliceUpdate.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorStridedSliceUpdate.pbtxt index 67dae5ab4dd346..3854eeed137057 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorStridedSliceUpdate.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorStridedSliceUpdate.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorStridedSliceUpdate" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorSummary.pbtxt index 5486c2ef84aaed..bf4114aeef398c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorSummary" input_arg { name: "tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorSummaryV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorSummaryV2.pbtxt index 107c53939f3b29..39092b078161f0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TensorSummaryV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorSummaryV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TensorSummaryV2" input_arg { name: "tag" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TextLineDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TextLineDataset.pbtxt index 5f539c525b0048..c51a34124f2d1e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TextLineDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TextLineDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TextLineDataset" input_arg { name: "filenames" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TextLineReader.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TextLineReader.pbtxt index 2e0924bb51786e..baf1ef10d91047 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TextLineReader.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TextLineReader.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TextLineReader" output_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TextLineReaderV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TextLineReaderV2.pbtxt index d30d08b5fbf150..c669951acdf6b6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TextLineReaderV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TextLineReaderV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TextLineReaderV2" output_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ThreadPoolDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ThreadPoolDataset.pbtxt index b8c817fc13876a..8e185af579f2be 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ThreadPoolDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ThreadPoolDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ThreadPoolDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ThreadPoolHandle.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ThreadPoolHandle.pbtxt index 4fac8fb83b79f1..e2518b1439d732 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ThreadPoolHandle.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ThreadPoolHandle.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ThreadPoolHandle" output_arg { name: "handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ThreadUnsafeUnigramCandidateSampler.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ThreadUnsafeUnigramCandidateSampler.pbtxt index 0e99e93edcc732..89106aab220583 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ThreadUnsafeUnigramCandidateSampler.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ThreadUnsafeUnigramCandidateSampler.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ThreadUnsafeUnigramCandidateSampler" input_arg { name: "true_classes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Tile.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Tile.pbtxt index f3a139e2265abd..67de1e5201698c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Tile.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Tile.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Tile" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TileGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TileGrad.pbtxt index f068a74b1c3d3d..f710e1c470f254 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TileGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TileGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TileGrad" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Timestamp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Timestamp.pbtxt index 18f5f4d2d23f03..6e51504d17653e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Timestamp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Timestamp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Timestamp" output_arg { name: "ts" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ToBool.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ToBool.pbtxt index 2903fe7f0b2e7f..6e02fdbb52cdeb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ToBool.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ToBool.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ToBool" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TopK.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TopK.pbtxt index 8ad9a3c23bb9c7..71c98b7fd8120e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TopK.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TopK.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TopK" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TopKUnique.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TopKUnique.pbtxt index 23b15af2aef16e..12463385bcc816 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TopKUnique.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TopKUnique.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TopKUnique" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TopKV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TopKV2.pbtxt index 5da8c479b9a8b3..22908f661e62f9 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TopKV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TopKV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TopKV2" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TopKWithUnique.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TopKWithUnique.pbtxt index cfb4e9da928507..5e3216fa554877 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TopKWithUnique.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TopKWithUnique.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TopKWithUnique" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TpuHandleToProtoKey.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TpuHandleToProtoKey.pbtxt index 3d2c41d36b4d15..1f8d36887b9733 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TpuHandleToProtoKey.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TpuHandleToProtoKey.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TpuHandleToProtoKey" input_arg { name: "uid" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Transpose.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Transpose.pbtxt index e83ec349f19b72..fa4fb6d58937c4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Transpose.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Transpose.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Transpose" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TridiagonalMatMul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TridiagonalMatMul.pbtxt index 98f70443f67a82..117d68b48d29b6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TridiagonalMatMul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TridiagonalMatMul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TridiagonalMatMul" input_arg { name: "superdiag" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TridiagonalSolve.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TridiagonalSolve.pbtxt index f86be1a7508c37..d824c1cb76c35e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TridiagonalSolve.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TridiagonalSolve.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TridiagonalSolve" input_arg { name: "diagonals" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TruncateDiv.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TruncateDiv.pbtxt index 74a4db11dc5efc..0b6c414f616163 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TruncateDiv.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TruncateDiv.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TruncateDiv" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TruncateMod.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TruncateMod.pbtxt index 72517db5294d89..70ce81b35c4209 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TruncateMod.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TruncateMod.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TruncateMod" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/TruncatedNormal.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TruncatedNormal.pbtxt index c066b3283ec0ad..018d657985d5e8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/TruncatedNormal.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/TruncatedNormal.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "TruncatedNormal" input_arg { name: "shape" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Unbatch.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Unbatch.pbtxt index 624522ee54ced0..3934b1823ff052 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Unbatch.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Unbatch.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Unbatch" input_arg { name: "batched_tensor" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnbatchDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnbatchDataset.pbtxt index fa3075b92f41ee..84479c117206d7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnbatchDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnbatchDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnbatchDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnbatchGrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnbatchGrad.pbtxt index f2619637143ee8..97240f0be53a0f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnbatchGrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnbatchGrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnbatchGrad" input_arg { name: "original_input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UncompressElement.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UncompressElement.pbtxt index 04674945d82c97..68406e0e4bc755 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UncompressElement.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UncompressElement.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UncompressElement" input_arg { name: "compressed" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnicodeDecode.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnicodeDecode.pbtxt index a8aac23ae733e7..fa036b31ef6c38 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnicodeDecode.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnicodeDecode.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnicodeDecode" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnicodeDecodeWithOffsets.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnicodeDecodeWithOffsets.pbtxt index 05a35cc1ad2c7f..29d274738da829 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnicodeDecodeWithOffsets.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnicodeDecodeWithOffsets.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnicodeDecodeWithOffsets" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnicodeEncode.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnicodeEncode.pbtxt index de0b916b6ab2a2..31a7a5b838820d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnicodeEncode.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnicodeEncode.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnicodeEncode" input_arg { name: "input_values" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnicodeScript.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnicodeScript.pbtxt index b9d7590e7c2ff2..60877b544480fe 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnicodeScript.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnicodeScript.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnicodeScript" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnicodeTranscode.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnicodeTranscode.pbtxt index 494d7a9d6d8373..5cab73782ce8ec 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnicodeTranscode.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnicodeTranscode.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnicodeTranscode" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniformCandidateSampler.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniformCandidateSampler.pbtxt index affff4ad02d2dc..bea963f908ee14 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniformCandidateSampler.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniformCandidateSampler.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniformCandidateSampler" input_arg { name: "true_classes" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniformDequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniformDequantize.pbtxt index 68557e82c563e1..7653370635d18c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniformDequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniformDequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniformDequantize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantize.pbtxt index 3e7d42c86198c8..900da8e5be53c3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniformQuantize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedAdd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedAdd.pbtxt index 480e70a6693739..1adac5ae59e790 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedAdd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedAdd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniformQuantizedAdd" input_arg { name: "lhs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedClipByValue.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedClipByValue.pbtxt index a9d0a79fded26e..8ab01d6c42d9a6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedClipByValue.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedClipByValue.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniformQuantizedClipByValue" input_arg { name: "operand" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedConvolution.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedConvolution.pbtxt index 68c4746c5210ae..4ebbe6c80690e8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedConvolution.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedConvolution.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniformQuantizedConvolution" input_arg { name: "lhs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedConvolutionHybrid.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedConvolutionHybrid.pbtxt index 9a9f8fc085fccb..23096f391d9b6e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedConvolutionHybrid.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedConvolutionHybrid.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniformQuantizedConvolutionHybrid" input_arg { name: "lhs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedDot.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedDot.pbtxt index 9b3030bb3bd8a6..159dabb9798621 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedDot.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedDot.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniformQuantizedDot" input_arg { name: "lhs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedDotHybrid.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedDotHybrid.pbtxt index 38c8ed55f5978e..2cabf91667e386 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedDotHybrid.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniformQuantizedDotHybrid.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniformQuantizedDotHybrid" input_arg { name: "lhs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniformRequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniformRequantize.pbtxt index bfc3d47945b686..af2d7387c5d695 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniformRequantize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniformRequantize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniformRequantize" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Unique.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Unique.pbtxt index 9d91d9ef4c6164..be389ba1482be8 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Unique.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Unique.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Unique" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniqueDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniqueDataset.pbtxt index 58fc7e07a35822..281ba7c1bd0619 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniqueDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniqueDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniqueDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniqueV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniqueV2.pbtxt index e76969551065c0..83113e14232fb2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniqueV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniqueV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniqueV2" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniqueWithCounts.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniqueWithCounts.pbtxt index 4d5682561cf926..c386059943a70e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniqueWithCounts.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniqueWithCounts.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniqueWithCounts" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UniqueWithCountsV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UniqueWithCountsV2.pbtxt index d54bba74d97327..85a12b70007320 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UniqueWithCountsV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UniqueWithCountsV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UniqueWithCountsV2" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Unpack.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Unpack.pbtxt index 6f24385e25bce6..cc5fd918d4c694 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Unpack.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Unpack.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Unpack" input_arg { name: "value" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnravelIndex.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnravelIndex.pbtxt index 36e66f2d2b0f4b..df2c2bc8469451 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnravelIndex.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnravelIndex.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnravelIndex" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentJoin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentJoin.pbtxt index 8c95acc7ab9dba..dcbb91bc2f13c4 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentJoin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentJoin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnsortedSegmentJoin" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentMax.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentMax.pbtxt index fc9ca18d6cfeed..ee8578f289bdc0 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentMax.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentMax.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnsortedSegmentMax" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentMin.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentMin.pbtxt index 33e3635173d589..6a8e5ba6d1fbe6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentMin.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentMin.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnsortedSegmentMin" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentProd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentProd.pbtxt index e1543a33441e81..d100cde127e9f1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentProd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentProd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnsortedSegmentProd" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentSum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentSum.pbtxt index ff0bf2d7a8cf8a..28ddbd3bdec499 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentSum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnsortedSegmentSum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnsortedSegmentSum" input_arg { name: "data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Unstage.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Unstage.pbtxt index af51a8ce0b8ca6..4bcfd02758ced6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Unstage.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Unstage.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Unstage" output_arg { name: "values" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UnwrapDatasetVariant.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UnwrapDatasetVariant.pbtxt index fc01d23c863703..10e23a97750a70 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UnwrapDatasetVariant.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UnwrapDatasetVariant.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UnwrapDatasetVariant" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/UpperBound.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/UpperBound.pbtxt index 5cfd0d536c5238..d1b3fa060c6942 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/UpperBound.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/UpperBound.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "UpperBound" input_arg { name: "sorted_inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/VarHandleOp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/VarHandleOp.pbtxt index acca5ff60992e7..80bc633b5ac613 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/VarHandleOp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/VarHandleOp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "VarHandleOp" output_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/VarIsInitializedOp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/VarIsInitializedOp.pbtxt index 975983e81af8ff..395360158262b3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/VarIsInitializedOp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/VarIsInitializedOp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "VarIsInitializedOp" input_arg { name: "resource" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Variable.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Variable.pbtxt index 7ad65f1fe6a3f6..943c24def5944d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Variable.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Variable.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Variable" output_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/VariableShape.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/VariableShape.pbtxt index e0bb7d05dafe25..570b4f241aaa95 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/VariableShape.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/VariableShape.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "VariableShape" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/VariableV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/VariableV2.pbtxt index dabd46dcfb467c..c27112f15887b1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/VariableV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/VariableV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "VariableV2" output_arg { name: "ref" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WeightedFlatMapDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WeightedFlatMapDataset.pbtxt index 5c50229e4c5b07..98c853bb2f3e64 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WeightedFlatMapDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WeightedFlatMapDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WeightedFlatMapDataset" input_arg { name: "input_datasets" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Where.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Where.pbtxt index b23dca17037197..8e64cd2419e2c1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Where.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Where.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Where" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/While.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/While.pbtxt index 8e609867f8d94f..807461b00984bd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/While.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/While.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "While" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WholeFileReader.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WholeFileReader.pbtxt index b1513138650504..729d76503e53dd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WholeFileReader.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WholeFileReader.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WholeFileReader" output_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WholeFileReaderV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WholeFileReaderV2.pbtxt index f451cf41c57bc3..2430494342d709 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WholeFileReaderV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WholeFileReaderV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WholeFileReaderV2" output_arg { name: "reader_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WindowDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WindowDataset.pbtxt index a5fe3aabbfe45e..43784faad6dc62 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WindowDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WindowDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WindowDataset" input_arg { name: "input_dataset" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WindowOp.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WindowOp.pbtxt index 60e080a0472b0e..336a13805eaf6a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WindowOp.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WindowOp.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WindowOp" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WorkerHeartbeat.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WorkerHeartbeat.pbtxt index bfadb835956750..ae5c7b8caaad6a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WorkerHeartbeat.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WorkerHeartbeat.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WorkerHeartbeat" input_arg { name: "request" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WrapDatasetVariant.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WrapDatasetVariant.pbtxt index 247262ec65ce5d..0b1e4363bd20e6 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WrapDatasetVariant.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WrapDatasetVariant.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WrapDatasetVariant" input_arg { name: "input_handle" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WriteAudioSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WriteAudioSummary.pbtxt index 6ec7f394c44bb2..8cc81eba8ff3a2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WriteAudioSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WriteAudioSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WriteAudioSummary" input_arg { name: "writer" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WriteFile.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WriteFile.pbtxt index 55a4c60b10173b..6a15b39873d560 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WriteFile.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WriteFile.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WriteFile" input_arg { name: "filename" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WriteGraphSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WriteGraphSummary.pbtxt index 7851117301a1b3..2957e224f59514 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WriteGraphSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WriteGraphSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WriteGraphSummary" input_arg { name: "writer" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WriteHistogramSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WriteHistogramSummary.pbtxt index 869b72fc133101..492d573056823b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WriteHistogramSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WriteHistogramSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WriteHistogramSummary" input_arg { name: "writer" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WriteImageSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WriteImageSummary.pbtxt index 45e3243c6bcf40..1cfc5ca69a7f40 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WriteImageSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WriteImageSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WriteImageSummary" input_arg { name: "writer" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WriteRawProtoSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WriteRawProtoSummary.pbtxt index 6c571b9f2c7f7a..82ac51a137894b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WriteRawProtoSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WriteRawProtoSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WriteRawProtoSummary" input_arg { name: "writer" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WriteScalarSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WriteScalarSummary.pbtxt index e40411aabdc256..0f359a85dce91b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WriteScalarSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WriteScalarSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WriteScalarSummary" input_arg { name: "writer" diff --git a/tensorflow/core/ops/compat/ops_history_v2/WriteSummary.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/WriteSummary.pbtxt index f6f887199db2e1..a641ece08df095 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/WriteSummary.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/WriteSummary.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "WriteSummary" input_arg { name: "writer" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Xdivy.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Xdivy.pbtxt index 6536322552738d..898987f947cd3a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Xdivy.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Xdivy.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Xdivy" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaConcatND.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaConcatND.pbtxt index 7d320b8f5c4544..c571497bcd5535 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaConcatND.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaConcatND.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaConcatND" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaHostCompute.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaHostCompute.pbtxt index 787e744ed8350a..9675bda59a86dd 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaHostCompute.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaHostCompute.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaHostCompute" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaRecvFromHost.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaRecvFromHost.pbtxt index 10c8f79678c334..d3760ea79b1272 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaRecvFromHost.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaRecvFromHost.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaRecvFromHost" output_arg { name: "output" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingActivations.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingActivations.pbtxt index 208eea2857258f..b624b26a628bc2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingActivations.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingActivations.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaRecvTPUEmbeddingActivations" input_arg { name: "deduplication_data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingActivationsV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingActivationsV2.pbtxt index c5abbb2d5f1a4e..2e8fb4d4f2530c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingActivationsV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingActivationsV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaRecvTPUEmbeddingActivationsV2" input_arg { name: "deduplication_data" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingDeduplicationData.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingDeduplicationData.pbtxt index c931212779893b..3c3b92f9d7bff7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingDeduplicationData.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingDeduplicationData.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaRecvTPUEmbeddingDeduplicationData" output_arg { name: "output" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingDeduplicationDataV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingDeduplicationDataV2.pbtxt index 71632c6d871452..d97710b91e46fb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingDeduplicationDataV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaRecvTPUEmbeddingDeduplicationDataV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaRecvTPUEmbeddingDeduplicationDataV2" output_arg { name: "output" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSendTPUEmbeddingGradients.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSendTPUEmbeddingGradients.pbtxt index 6da5fb2f5395cb..77f6547229554f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSendTPUEmbeddingGradients.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSendTPUEmbeddingGradients.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSendTPUEmbeddingGradients" input_arg { name: "gradients" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSendTPUEmbeddingGradientsV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSendTPUEmbeddingGradientsV2.pbtxt index f52b83abc14719..b416d0ad1a8f0c 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSendTPUEmbeddingGradientsV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSendTPUEmbeddingGradientsV2.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSendTPUEmbeddingGradientsV2" input_arg { name: "gradients" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSendToHost.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSendToHost.pbtxt index 90cc16e20a2e95..f2dfeaf444491e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSendToHost.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSendToHost.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSendToHost" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreAdagrad.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreAdagrad.pbtxt index 9cf626b404b1e7..bc20baf287c8b1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreAdagrad.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreAdagrad.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseCoreAdagrad" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreAdagradMomentum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreAdagradMomentum.pbtxt index b64460485d5ac9..5e2b17ab4c9238 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreAdagradMomentum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreAdagradMomentum.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseCoreAdagradMomentum" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreAdam.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreAdam.pbtxt index 38af8afcc1d10d..625c164bb20ed3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreAdam.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreAdam.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseCoreAdam" input_arg { name: "embedding_table" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreFtrl.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreFtrl.pbtxt index afbf9e024d7041..b65b707befee31 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreFtrl.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreFtrl.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseCoreFtrl" input_arg { name: "embedding_table" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreSgd.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreSgd.pbtxt index 7f507c7d722106..677ce14ddf039d 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreSgd.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseCoreSgd.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseCoreSgd" input_arg { name: "indices" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmul.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmul.pbtxt index 5ecf0c2472748a..90aa2cf22849b3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmul.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmul.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseDenseMatmul" input_arg { name: "row_ids" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradAndCsrInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradAndCsrInput.pbtxt index e13cbfcff32417..fbf266b10e35fa 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradAndCsrInput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradAndCsrInput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseDenseMatmulGradWithAdagradAndCsrInput" input_arg { name: "row_pointers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradAndStaticBufferSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradAndStaticBufferSize.pbtxt index 182bfdbf3bcab2..359a038ea9b6b7 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradAndStaticBufferSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradAndStaticBufferSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseDenseMatmulGradWithAdagradAndStaticBufferSize" input_arg { name: "row_pointers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradMomentumAndCsrInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradMomentumAndCsrInput.pbtxt index e6f2eed0c1d75b..5150a4f23b598f 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradMomentumAndCsrInput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradMomentumAndCsrInput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseDenseMatmulGradWithAdagradMomentumAndCsrInput" input_arg { name: "row_pointers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradMomentumAndStaticBufferSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradMomentumAndStaticBufferSize.pbtxt index 3bd492cc55a70c..4fd6fa9bb5a5b2 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradMomentumAndStaticBufferSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdagradMomentumAndStaticBufferSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseDenseMatmulGradWithAdagradMomentumAndStaticBufferSize" input_arg { name: "row_pointers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdamAndCsrInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdamAndCsrInput.pbtxt index 202e6f4f4f7b09..aaa27b25954a9e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdamAndCsrInput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdamAndCsrInput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseDenseMatmulGradWithAdamAndCsrInput" input_arg { name: "row_pointers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdamAndStaticBufferSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdamAndStaticBufferSize.pbtxt index f058e30f800527..5024f72b5c66cb 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdamAndStaticBufferSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithAdamAndStaticBufferSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseDenseMatmulGradWithAdamAndStaticBufferSize" input_arg { name: "row_pointers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithFtrlAndCsrInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithFtrlAndCsrInput.pbtxt index 96121a6bab883d..261f25bebfd7df 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithFtrlAndCsrInput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithFtrlAndCsrInput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseDenseMatmulGradWithFtrlAndCsrInput" input_arg { name: "row_pointers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithFtrlAndStaticBufferSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithFtrlAndStaticBufferSize.pbtxt index 23c6d671dd7914..f2f57f2f744d7b 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithFtrlAndStaticBufferSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithFtrlAndStaticBufferSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseDenseMatmulGradWithFtrlAndStaticBufferSize" input_arg { name: "row_pointers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithSgdAndCsrInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithSgdAndCsrInput.pbtxt index 3ad518fadb629e..9446a6fa98c515 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithSgdAndCsrInput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithSgdAndCsrInput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseDenseMatmulGradWithSgdAndCsrInput" input_arg { name: "row_pointers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithSgdAndStaticBufferSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithSgdAndStaticBufferSize.pbtxt index 46a1fe8d6e2f6a..dbb06c95f6d643 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithSgdAndStaticBufferSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulGradWithSgdAndStaticBufferSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseDenseMatmulGradWithSgdAndStaticBufferSize" input_arg { name: "row_pointers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulWithCsrInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulWithCsrInput.pbtxt index 1aa1743718a32a..2b4bc1dcba74ac 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulWithCsrInput.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulWithCsrInput.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseDenseMatmulWithCsrInput" input_arg { name: "row_pointers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulWithStaticBufferSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulWithStaticBufferSize.pbtxt index 85888d026c595c..471ded1635244a 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulWithStaticBufferSize.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSparseDenseMatmulWithStaticBufferSize.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSparseDenseMatmulWithStaticBufferSize" input_arg { name: "row_pointers" diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSplitND.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSplitND.pbtxt index d4d5391b1340df..353b6e166d0f2e 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/XlaSplitND.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSplitND.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "XlaSplitND" input_arg { name: "input" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Xlog1py.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Xlog1py.pbtxt index f37a09eea17228..9fe466c2fb8165 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Xlog1py.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Xlog1py.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Xlog1py" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Xlogy.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Xlogy.pbtxt index 8da356ec49fe8c..8e7df823ff73dc 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Xlogy.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Xlogy.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Xlogy" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ZerosLike.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ZerosLike.pbtxt index c1200273821a51..5bb8d0ab3781d5 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ZerosLike.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ZerosLike.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ZerosLike" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/Zeta.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/Zeta.pbtxt index 7f86bd29b404cd..c391bd1f22c6a1 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/Zeta.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/Zeta.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "Zeta" input_arg { name: "x" diff --git a/tensorflow/core/ops/compat/ops_history_v2/ZipDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ZipDataset.pbtxt index 60efabb2d58f62..5dd34535a666d3 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/ZipDataset.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/ZipDataset.pbtxt @@ -1,4 +1,4 @@ -op { +op { name: "ZipDataset" input_arg { name: "input_datasets" diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index c8c55e08ca0d32..c0a6aa926c51e0 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugonly +go/debugproto op { name: "Abort" attr { @@ -60061,6 +60061,8 @@ op { type: DT_DOUBLE type: DT_INT32 type: DT_INT64 + type: DT_UINT32 + type: DT_UINT64 } } } From 52911fba4c7e2852dc36a601fc3918263f552e2c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 11:44:50 -0700 Subject: [PATCH 125/478] Update visibility in third_party/tensorflow/python/platform/BUILD. PiperOrigin-RevId: 633657239 --- tensorflow/python/platform/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/platform/BUILD b/tensorflow/python/platform/BUILD index df26d8b84ec5a3..0ca7e7bfae738f 100644 --- a/tensorflow/python/platform/BUILD +++ b/tensorflow/python/platform/BUILD @@ -15,6 +15,7 @@ visibility = [ # copybara:uncomment "//learning/brain/contrib/eager/numlib/benchmarks/kumamon:__subpackages__", # copybara:uncomment "//learning/brain/mobile/lite/tooling/model_analyzer:__subpackages__", # copybara:uncomment "//tensorflow_serving/model_servers:__subpackages__", + # copybara:uncomment "//third_party/odml/model_customization/quantization:__subpackages__", ] package( From f6a2b933193c44bf382e47e099122deccc4d2f8c Mon Sep 17 00:00:00 2001 From: "Dimitar (Mitko) Asenov" Date: Tue, 14 May 2024 11:54:14 -0700 Subject: [PATCH 126/478] [XLA:GPU] Remove the deprecated xla flag `--xla_gpu_simplify_all_fp_conversions`. Use `--xla_allow_excess_precision` instead. PiperOrigin-RevId: 633659944 --- third_party/xla/xla/debug_options_flags.cc | 6 ------ third_party/xla/xla/service/gpu/gpu_compiler.cc | 6 ++---- third_party/xla/xla/xla.proto | 3 +-- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/third_party/xla/xla/debug_options_flags.cc b/third_party/xla/xla/debug_options_flags.cc index aa95aa8a3f6120..aef3c9ef09de2c 100644 --- a/third_party/xla/xla/debug_options_flags.cc +++ b/third_party/xla/xla/debug_options_flags.cc @@ -147,7 +147,6 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { opts.set_xla_gpu_redzone_padding_bytes(8 * 1024 * 1024); opts.set_xla_gpu_shape_checks(DebugOptions::RUNTIME); opts.set_xla_gpu_normalize_layouts(true); - opts.set_xla_gpu_simplify_all_fp_conversions(true); opts.set_xla_dump_latency_hiding_schedule(false); opts.set_xla_gpu_enable_latency_hiding_scheduler(false); opts.set_xla_gpu_lhs_enable_gpu_async_tracker(true); @@ -1229,11 +1228,6 @@ void MakeDebugOptionsFlags(std::vector* flag_list, "Amount of padding the redzone allocator will put on one side of each " "buffer it allocates. (So the buffer's total size will be increased by " "2x this value.)")); - flag_list->push_back(tsl::Flag( - "xla_gpu_simplify_all_fp_conversions", - bool_setter_for(&DebugOptions::set_xla_gpu_simplify_all_fp_conversions), - debug_options->xla_gpu_simplify_all_fp_conversions(), - "Allows any chain of floating-point conversions to be simplified.")); flag_list->push_back(tsl::Flag( "xla_gpu_shape_checks", setter_for_xla_gpu_shape_checks, DebugOptions::ShapeChecks_Name(debug_options->xla_gpu_shape_checks()), diff --git a/third_party/xla/xla/service/gpu/gpu_compiler.cc b/third_party/xla/xla/service/gpu/gpu_compiler.cc index 6d293297d2ffad..542c2ae0524d6f 100644 --- a/third_party/xla/xla/service/gpu/gpu_compiler.cc +++ b/third_party/xla/xla/service/gpu/gpu_compiler.cc @@ -1324,8 +1324,7 @@ absl::Status GpuCompiler::OptimizeHloPostLayoutAssignment( sub_pipeline.AddPass(&f8e5m2fnuz_support); sub_pipeline.AddPass(&f8e4m3fnuz_support); // Remove `f32 -> bf16 -> f32` casts inserted by bf16 normalization. - if (debug_options.xla_allow_excess_precision() && - debug_options.xla_gpu_simplify_all_fp_conversions()) { + if (debug_options.xla_allow_excess_precision()) { sub_pipeline.AddPass(); } }; @@ -1466,8 +1465,7 @@ absl::Status GpuCompiler::OptimizeHloPostLayoutAssignment( pipeline.AddPass>(simplifier_options, gpu_version); - if (debug_options.xla_allow_excess_precision() && - debug_options.xla_gpu_simplify_all_fp_conversions()) { + if (debug_options.xla_allow_excess_precision()) { // This pass cleans up chains of compiler-generated converts // (i.e. f32 -> bf16 -> f32) that have been produced by the algebraic // simplifier by rearranging ops (i.e. by pushing broadcasts towards the diff --git a/third_party/xla/xla/xla.proto b/third_party/xla/xla/xla.proto index 56ddedc2d6416b..4e0abf59b50906 100644 --- a/third_party/xla/xla/xla.proto +++ b/third_party/xla/xla/xla.proto @@ -519,8 +519,7 @@ message DebugOptions { // scratch), so this can be multiplied by quite a lot. int64 xla_gpu_redzone_padding_bytes = 228; - // Deprecated. Use xla_allow_excess_precision instead. - bool xla_gpu_simplify_all_fp_conversions = 168 [deprecated = true]; + reserved 168; // Was xla_allow_excess_precision. // An experimental option to force all layouts present in the // after-optimizations HLO to be descending, e.g. From e3e649bf7b7e48ca39fa49065e7b7db3b90a3f8a Mon Sep 17 00:00:00 2001 From: Deqiang Chen Date: Tue, 14 May 2024 12:01:51 -0700 Subject: [PATCH 127/478] Reduce messages from sharding_utils.cc PiperOrigin-RevId: 633662509 --- tensorflow/core/tfrt/ifrt/sharding_utils.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/tfrt/ifrt/sharding_utils.cc b/tensorflow/core/tfrt/ifrt/sharding_utils.cc index 03cc5f953edb38..13e0c2941f4201 100644 --- a/tensorflow/core/tfrt/ifrt/sharding_utils.cc +++ b/tensorflow/core/tfrt/ifrt/sharding_utils.cc @@ -174,8 +174,8 @@ SplitAndCreateArraysFromHostBuffer( kImmutableUntilTransferCompletes, [tensor, slice_idx]() { // Keep tensor alive - LOG(INFO) << "Done with host buffer for slice " << slice_idx - << " at " << tensor.data(); + VLOG(2) << "Done with host buffer for slice " << slice_idx + << " at " << tensor.data(); })); arrays.push_back(std::move(array)); device_iter++; From b4a18a16059f0940bb9f57b520dff678aacf19ac Mon Sep 17 00:00:00 2001 From: Rohit Upadhyaya Date: Tue, 14 May 2024 12:17:34 -0700 Subject: [PATCH 128/478] Cleanup change: tf::Status is an alias for absl::Status. Add tests PiperOrigin-RevId: 633667291 --- tensorflow/core/tfrt/utils/BUILD | 3 ++- tensorflow/core/tfrt/utils/error_util.cc | 6 ------ tensorflow/core/tfrt/utils/error_util.h | 2 -- tensorflow/core/tfrt/utils/error_util_test.cc | 10 ++++++++++ 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/tfrt/utils/BUILD b/tensorflow/core/tfrt/utils/BUILD index 7517e07928f87b..7c5cf7a46579a5 100644 --- a/tensorflow/core/tfrt/utils/BUILD +++ b/tensorflow/core/tfrt/utils/BUILD @@ -155,9 +155,10 @@ tf_cc_test( deps = [ ":error_util", "//tensorflow/core/platform:status", + "@com_google_absl//absl/status", "@com_google_googletest//:gtest_main", + "@local_xla//xla/tsl/concurrency:async_value", "@tf_runtime//:support", - "@tf_runtime//cpp_tests:common", ], ) diff --git a/tensorflow/core/tfrt/utils/error_util.cc b/tensorflow/core/tfrt/utils/error_util.cc index 1f8fb6a6770570..2530b98f051041 100644 --- a/tensorflow/core/tfrt/utils/error_util.cc +++ b/tensorflow/core/tfrt/utils/error_util.cc @@ -47,10 +47,4 @@ tensorflow::Status ToTfStatus(const tfrt::AsyncValue* av) { return absl::OkStatus(); } -absl::Status AbslStatusFromTfStatus(tensorflow::Status status) { - if (status.ok()) return absl::OkStatus(); - return absl::Status(static_cast(status.code()), - status.message()); -} - } // namespace tfrt diff --git a/tensorflow/core/tfrt/utils/error_util.h b/tensorflow/core/tfrt/utils/error_util.h index e694931f82e2e5..ee7bcd81dd913f 100644 --- a/tensorflow/core/tfrt/utils/error_util.h +++ b/tensorflow/core/tfrt/utils/error_util.h @@ -76,8 +76,6 @@ inline llvm::Error MakeStatusError(tensorflow::Status status) { return MakeStringError(MakeStatusString(status)); } -absl::Status AbslStatusFromTfStatus(tensorflow::Status status); - } // namespace tfrt #endif // TENSORFLOW_CORE_TFRT_UTILS_ERROR_UTIL_H_ diff --git a/tensorflow/core/tfrt/utils/error_util_test.cc b/tensorflow/core/tfrt/utils/error_util_test.cc index 07c65905825a53..06edb63c897af4 100644 --- a/tensorflow/core/tfrt/utils/error_util_test.cc +++ b/tensorflow/core/tfrt/utils/error_util_test.cc @@ -16,6 +16,8 @@ limitations under the License. #include #include +#include "absl/status/status.h" +#include "xla/tsl/concurrency/async_value_ref.h" #include "tensorflow/core/platform/status.h" #include "tfrt/support/error_util.h" // from @tf_runtime @@ -38,5 +40,13 @@ TEST(ErrorUtilTest, UnsupportedErrorConversion) { tfrt::ErrorCode::kUnknown); } +TEST(ErrorUtilTest, ToTfStatusError) { + auto error_av = + tsl::MakeErrorAsyncValueRef(absl::UnauthenticatedError("test_error")); + auto status = ToTfStatus(error_av.get()); + EXPECT_EQ(status.code(), absl::StatusCode::kUnauthenticated); + EXPECT_EQ(status.message(), "test_error"); +} + } // namespace } // namespace tfrt From d7a56fffe080617331aaf3f1b5d172f62babf0d5 Mon Sep 17 00:00:00 2001 From: Kuy Mainwaring Date: Tue, 14 May 2024 12:28:42 -0700 Subject: [PATCH 129/478] [XLA:GPU] Clang-tidy fixes for xla/service/gpu/runtime/fft_thunk.cc PiperOrigin-RevId: 633670507 --- third_party/xla/xla/service/gpu/runtime/BUILD | 8 +++----- .../xla/xla/service/gpu/runtime/fft_thunk.cc | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/third_party/xla/xla/service/gpu/runtime/BUILD b/third_party/xla/xla/service/gpu/runtime/BUILD index 4175990422e7bb..e53035e558ee9d 100644 --- a/third_party/xla/xla/service/gpu/runtime/BUILD +++ b/third_party/xla/xla/service/gpu/runtime/BUILD @@ -613,25 +613,23 @@ cc_library( srcs = ["fft_thunk.cc"], hdrs = ["fft_thunk.h"], deps = [ + ":thunk", "//xla:shape_util", + "//xla:status_macros", "//xla:types", "//xla:util", "//xla:xla_data_proto_cc", - "//xla/hlo/ir:hlo", "//xla/service:buffer_assignment", - "//xla/service/gpu:buffer_allocations", - "//xla/service/gpu/runtime:thunk", "//xla/stream_executor", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", - "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:span", "@local_tsl//tsl/platform:logging", - "@local_tsl//tsl/platform:status", + "@local_tsl//tsl/platform:statusor", ], ) diff --git a/third_party/xla/xla/service/gpu/runtime/fft_thunk.cc b/third_party/xla/xla/service/gpu/runtime/fft_thunk.cc index 728c36752aeed5..7d620522146acf 100644 --- a/third_party/xla/xla/service/gpu/runtime/fft_thunk.cc +++ b/third_party/xla/xla/service/gpu/runtime/fft_thunk.cc @@ -15,17 +15,30 @@ limitations under the License. #include "xla/service/gpu/runtime/fft_thunk.h" +#include +#include #include #include "absl/status/status.h" #include "absl/status/statusor.h" -#include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" +#include "xla/service/buffer_assignment.h" +#include "xla/service/gpu/runtime/thunk.h" +#include "xla/shape.h" +#include "xla/shape_util.h" +#include "xla/status_macros.h" +#include "xla/stream_executor/blas.h" +#include "xla/stream_executor/device_memory.h" +#include "xla/stream_executor/device_memory_allocator.h" +#include "xla/stream_executor/fft.h" #include "xla/stream_executor/scratch_allocator.h" #include "xla/stream_executor/stream_executor.h" #include "xla/types.h" #include "xla/util.h" #include "tsl/platform/logging.h" +#include "tsl/platform/statusor.h" namespace xla { namespace gpu { From 3d1bd4049fc906071088c1d560b75ea77e898a62 Mon Sep 17 00:00:00 2001 From: Gunhyun Park Date: Tue, 14 May 2024 12:31:06 -0700 Subject: [PATCH 130/478] Add unbounded dynamism test for AllToAllOp. PiperOrigin-RevId: 633671113 --- third_party/xla/xla/client/xla_builder.cc | 71 ++++++++++- third_party/xla/xla/client/xla_builder.h | 5 +- .../xla/xla/client/xla_builder_test.cc | 120 ++++++++++++++++++ .../xla/xla/service/shape_inference.cc | 30 ++++- .../xla/xla/service/shape_inference_test.cc | 26 ++++ 5 files changed, 241 insertions(+), 11 deletions(-) diff --git a/third_party/xla/xla/client/xla_builder.cc b/third_party/xla/xla/client/xla_builder.cc index ecc6b86dfcb575..9d32c48ea9afad 100644 --- a/third_party/xla/xla/client/xla_builder.cc +++ b/third_party/xla/xla/client/xla_builder.cc @@ -15,6 +15,7 @@ limitations under the License. #include "xla/client/xla_builder.h" +#include #include #include #include @@ -3789,15 +3790,55 @@ XlaOp XlaBuilder::AllToAllArray( return all_to_all; } DimensionVector sizes; + const bool is_unbounded = operand_shape->is_unbounded_dynamic(); + std::vector dynamic_sizes; + auto GetR1DimensionSizeOrConstant = [&](XlaOp operand, + int64_t dimension) -> XlaOp { + if (operand_shape->is_unbounded_dynamic_dimension(dimension)) { + return Reshape(GetDimensionSize(operand, dimension), {1}); + } + return ConstantR1( + this, {static_cast(operand_shape->dimensions(dimension))}); + }; + XlaOp r1_split_count = + ConstantR1(this, {static_cast(split_count)}); for (int64_t i = 0; i < operand_shape->rank(); ++i) { if (i != split_dimension) { sizes.push_back(operand_shape->dimensions(i)); + if (is_unbounded) { + dynamic_sizes.push_back(GetR1DimensionSizeOrConstant(operand, i)); + } continue; } sizes.push_back(split_count); - sizes.push_back(operand_shape->dimensions(i) / split_count); + sizes.push_back(operand_shape->is_unbounded_dynamic_dimension(i) + ? Shape::kUnboundedSize + : operand_shape->dimensions(i) / split_count); + + if (is_unbounded) { + dynamic_sizes.push_back(r1_split_count); + dynamic_sizes.push_back( + operand_shape->is_unbounded_dynamic_dimension(i) + ? Div(GetR1DimensionSizeOrConstant(operand, i), r1_split_count) + : ConstantR1(this, + {static_cast(sizes.back())})); + } + } + + if (is_unbounded) { + std::vector dynamic_dimensions; + std::transform( + sizes.begin(), sizes.end(), std::back_inserter(dynamic_dimensions), + [](int64_t size) { return size == Shape::kUnboundedSize; }); + TF_ASSIGN_OR_RETURN( + const Shape shape, + ShapeUtil::MakeValidatedShape(all_to_all_shape.element_type(), sizes, + dynamic_dimensions)); + all_to_all = + MhloDynamicReshape(all_to_all, ConcatInDim(dynamic_sizes, 0), shape); + } else { + all_to_all = Reshape(all_to_all, sizes); } - all_to_all = Reshape(all_to_all, sizes); std::vector permutation; const auto rank = operand_shape->rank(); @@ -3810,6 +3851,21 @@ XlaOp XlaBuilder::AllToAllArray( permutation.push_back(dim_after_reshape); } all_to_all = Transpose(all_to_all, permutation); + + if (is_unbounded) { + std::vector new_dimensions; + for (int64_t i = 0; i < operand_shape->rank(); ++i) { + new_dimensions.push_back(GetR1DimensionSizeOrConstant(operand, i)); + } + new_dimensions[split_dimension] = + Div(new_dimensions[split_dimension], r1_split_count); + new_dimensions[concat_dimension] = + Mul(new_dimensions[concat_dimension], r1_split_count); + + return MhloDynamicReshape(all_to_all, ConcatInDim(new_dimensions, 0), + all_to_all_shape); + } + return Reshape(all_to_all_shape, all_to_all); }); } @@ -3865,6 +3921,13 @@ XlaOp XlaBuilder::AllToAllTuple( const std::optional& channel_id) { return ReportErrorOrReturn([&]() -> absl::StatusOr { TF_ASSIGN_OR_RETURN(const Shape* operand_shape, GetShapePtr(operand)); + if (operand_shape->is_unbounded_dynamic() || + split_dimension == Shape::kUnboundedSize || + concat_dimension == Shape::kUnboundedSize || + split_count == Shape::kUnboundedSize) { + return InvalidArgument( + "AllToAllTuple does not support unbounded dynamic shapes"); + } // The HloInstruction for AllToAll currently only handles the data // communication: it accepts N already split parts and scatters them to N @@ -3890,14 +3953,14 @@ XlaOp XlaBuilder::AllToAllTuple( } // Handle data communication. - XlaOp alltoall = + XlaOp all_to_all = this->AllToAllTuple(slices, replica_groups, layout, channel_id); // Concat the N received parts. std::vector received; received.reserve(split_count); for (int i = 0; i < split_count; i++) { - received.push_back(this->GetTupleElement(alltoall, i)); + received.push_back(this->GetTupleElement(all_to_all, i)); } return this->ConcatInDim(received, concat_dimension); }); diff --git a/third_party/xla/xla/client/xla_builder.h b/third_party/xla/xla/client/xla_builder.h index 571904f7cd7995..84fa7588c69016 100644 --- a/third_party/xla/xla/client/xla_builder.h +++ b/third_party/xla/xla/client/xla_builder.h @@ -2564,7 +2564,10 @@ XlaOp ReduceScatter( const std::optional& layout = std::nullopt, std::optional use_global_device_ids = std::nullopt); -// Enqueues an operation that do an Alltoall of the operand cross cores. +// Enqueues an operation that do an AllToAll of the operand cross cores. +// This involves AllToAll, followed by Reshape, Transpose, and another Reshape +// to get proper codegen. See implementation for additional details. +// // An optional `layout` can be specified to force the layout of the instruction. // This is used to guarantee the same layout for a group of AllToAll ops // compiled separately. diff --git a/third_party/xla/xla/client/xla_builder_test.cc b/third_party/xla/xla/client/xla_builder_test.cc index 3aa8c1ca00c5aa..c1c9640aa841f1 100644 --- a/third_party/xla/xla/client/xla_builder_test.cc +++ b/third_party/xla/xla/client/xla_builder_test.cc @@ -1999,6 +1999,126 @@ TEST(XlaBuilderTest, UnboundedAllReduce) { GmockMatch(m::Op().WithShapeEqualTo(&expected))); } +TEST(XlaBuilderTest, UnboundedAllToAllDynamicSplitDimension) { + XlaBuilder b(TestName()); + TF_ASSERT_OK_AND_ASSIGN(const Shape operand, ParseShape("f32[?, 15]")); + TF_ASSERT_OK_AND_ASSIGN(const Shape expected, ParseShape("f32[?, 45]")); + AllToAll(/*operand=*/Parameter(&b, 0, operand, "operand"), + /*split_dimension=*/0, + /*concat_dimension=*/1, + /*split_count=*/3, + /*replica_groups=*/{}); + TF_ASSERT_OK_AND_ASSIGN(const std::unique_ptr module, + BuildHloModule(b)); + EXPECT_THAT(GetRoot(*module), + GmockMatch(m::Op().WithShapeEqualTo(&expected))); +} + +TEST(XlaBuilderTest, UnboundedAllToAllDynamicConcatDimension) { + XlaBuilder b(TestName()); + TF_ASSERT_OK_AND_ASSIGN(const Shape operand, ParseShape("f32[?, 15]")); + TF_ASSERT_OK_AND_ASSIGN(const Shape expected, ParseShape("f32[?, 5]")); + AllToAll(/*operand=*/Parameter(&b, 0, operand, "operand"), + /*split_dimension=*/1, + /*concat_dimension=*/0, + /*split_count=*/3, + /*replica_groups=*/{}); + TF_ASSERT_OK_AND_ASSIGN(const std::unique_ptr module, + BuildHloModule(b)); + EXPECT_THAT(GetRoot(*module), + GmockMatch(m::Op().WithShapeEqualTo(&expected))); +} + +TEST(XlaBuilderTest, UnboundedAllToAllDynamicSplitAndConcatDimensionEqual) { + XlaBuilder b(TestName()); + TF_ASSERT_OK_AND_ASSIGN(const Shape operand, ParseShape("f32[?, 15]")); + TF_ASSERT_OK_AND_ASSIGN(const Shape expected, ParseShape("f32[?, 15]")); + AllToAll(/*operand=*/Parameter(&b, 0, operand, "operand"), + /*split_dimension=*/0, + /*concat_dimension=*/0, + /*split_count=*/3, + /*replica_groups=*/{}); + TF_ASSERT_OK_AND_ASSIGN(const std::unique_ptr module, + BuildHloModule(b)); + EXPECT_THAT(GetRoot(*module), + GmockMatch(m::Op().WithShapeEqualTo(&expected))); +} + +TEST(XlaBuilderTest, UnboundedAllToAllFullyDynamic) { + XlaBuilder b(TestName()); + TF_ASSERT_OK_AND_ASSIGN(const Shape operand, ParseShape("f32[?, ?]")); + TF_ASSERT_OK_AND_ASSIGN(const Shape expected, ParseShape("f32[?, ?]")); + AllToAll(/*operand=*/Parameter(&b, 0, operand, "operand"), + /*split_dimension=*/0, + /*concat_dimension=*/1, + /*split_count=*/3, + /*replica_groups=*/{}); + TF_ASSERT_OK_AND_ASSIGN(const std::unique_ptr module, + BuildHloModule(b)); + EXPECT_THAT(GetRoot(*module), + GmockMatch(m::Op().WithShapeEqualTo(&expected))); +} + +TEST(XlaBuilderTest, UnboundedAllToAllTupleVariadicUnsupported) { + XlaBuilder b(TestName()); + TF_ASSERT_OK_AND_ASSIGN(const Shape operand, ParseShape("f32[?, 15]{1,0}")); + b.ReportErrorOrReturn( + AllToAllTuple(/*operands=*/{Parameter(&b, 0, operand, "operand0"), + Parameter(&b, 1, operand, "operand1")}, + /*replica_groups=*/{})); + EXPECT_THAT( + BuildHloModule(b), + StatusIs(_, + HasSubstr( + "AllToAllTuple does not support unbounded dynamic shapes"))); +} + +TEST(XlaBuilderTest, UnboundedAllToAllTupleUnsupported) { + XlaBuilder b(TestName()); + TF_ASSERT_OK_AND_ASSIGN(const Shape operand, ParseShape("f32[?, 15]{1,0}")); + b.ReportErrorOrReturn( + AllToAllTuple(/*operand=*/Parameter(&b, 0, operand, "operand"), + /*split_dimension=*/0, + /*concat_dimension=*/1, + /*split_count=*/3, + /*replica_groups=*/{})); + EXPECT_THAT( + BuildHloModule(b), + StatusIs(_, + HasSubstr( + "AllToAllTuple does not support unbounded dynamic shapes"))); +} + +TEST(XlaBuilderTest, BoundedAllToAllTupleUnsupported) { + XlaBuilder b(TestName()); + TF_ASSERT_OK_AND_ASSIGN(const Shape operand, ParseShape("f32[3, <=15]{1,0}")); + b.ReportErrorOrReturn( + AllToAllTuple(/*operand=*/Parameter(&b, 0, operand, "operand"), + /*split_dimension=*/0, + /*concat_dimension=*/1, + /*split_count=*/3, + /*replica_groups=*/{})); + EXPECT_THAT( + BuildHloModule(b), + StatusIs(_, + HasSubstr("AllToAll does not support bounded dynamic shapes"))); +} + +TEST(XlaBuilderTest, BoundedAllToAllUnsupported) { + XlaBuilder b(TestName()); + TF_ASSERT_OK_AND_ASSIGN(const Shape operand, ParseShape("f32[3, <=15]{1,0}")); + b.ReportErrorOrReturn( + AllToAllTuple(/*operand=*/Parameter(&b, 0, operand, "operand"), + /*split_dimension=*/0, + /*concat_dimension=*/1, + /*split_count=*/3, + /*replica_groups=*/{})); + EXPECT_THAT( + BuildHloModule(b), + StatusIs(_, + HasSubstr("AllToAll does not support bounded dynamic shapes"))); +} + TEST(XlaBuilderTest, UnboundedAnd) { XlaBuilder b(TestName()); TF_ASSERT_OK_AND_ASSIGN(const Shape lhs, diff --git a/third_party/xla/xla/service/shape_inference.cc b/third_party/xla/xla/service/shape_inference.cc index a6ba2645d35273..10d2f5d3f75b44 100644 --- a/third_party/xla/xla/service/shape_inference.cc +++ b/third_party/xla/xla/service/shape_inference.cc @@ -2492,6 +2492,8 @@ ShapeInference::InferScalarBroadcastShape(absl::Span shapes) { const Shape& shape, int64_t split_dimension, int64_t concat_dimension, int64_t split_count) { TF_RET_CHECK(split_count > 0); + TF_RET_CHECK(!shape.is_bounded_dynamic()) + << "AllToAll does not support bounded dynamic shapes"; if (split_dimension >= shape.rank() || split_dimension < 0) { return InvalidArgument( "AllToAll split_dimension %d is out-of-bounds in shape %s.", @@ -2502,25 +2504,41 @@ ShapeInference::InferScalarBroadcastShape(absl::Span shapes) { "AllToAll concat_dimension %d is out-of-bounds in shape %s.", concat_dimension, ShapeUtil::HumanString(shape)); } - if (shape.dimensions(split_dimension) % split_count != 0) { + int64_t split_dimension_size = shape.dimensions(split_dimension); + if (!IsUnboundedDynamicSize(split_dimension_size) && + split_dimension_size % split_count != 0) { return InvalidArgument( "AllToAll split dimension size %d must be dividable by split_count " "%d.", - shape.dimensions(split_dimension), split_count); + split_dimension_size, split_count); } std::vector new_dimensions(shape.dimensions().begin(), shape.dimensions().end()); - new_dimensions[split_dimension] /= split_count; - new_dimensions[concat_dimension] *= split_count; - return ShapeUtil::MakeShape(shape.element_type(), new_dimensions); + new_dimensions[split_dimension] = + IsUnboundedDynamicSize(new_dimensions[split_dimension]) + ? Shape::kUnboundedSize + : new_dimensions[split_dimension] / split_count; + new_dimensions[concat_dimension] = + IsUnboundedDynamicSize(new_dimensions[concat_dimension]) + ? Shape::kUnboundedSize + : new_dimensions[concat_dimension] * split_count; + + const std::vector dynamic_dimensions(shape.dynamic_dimensions().begin(), + shape.dynamic_dimensions().end()); + return ShapeUtil::MakeShape(shape.element_type(), new_dimensions, + dynamic_dimensions); } /* static */ absl::StatusOr ShapeInference::InferAllToAllTupleShape( absl::Span operand_shapes) { - // An Alltoall HLO instruction receives N operands (with the same shape) and + // An AllToAll HLO instruction receives N operands (with the same shape) and // returns a tuple that contains N array shapes. TF_RET_CHECK(!operand_shapes.empty()); for (int i = 0; i < operand_shapes.size(); i++) { + if (operand_shapes[i]->is_unbounded_dynamic()) { + return InvalidArgument( + "AllToAllTuple does not support unbounded dynamic shapes"); + } if (!Shape::Equal().IgnoreMemorySpaceInLayout()(*operand_shapes[0], *operand_shapes[i])) { return InvalidArgument( diff --git a/third_party/xla/xla/service/shape_inference_test.cc b/third_party/xla/xla/service/shape_inference_test.cc index 3afa14fa302eb8..2ec1968a94b7c2 100644 --- a/third_party/xla/xla/service/shape_inference_test.cc +++ b/third_party/xla/xla/service/shape_inference_test.cc @@ -4056,6 +4056,32 @@ TEST_F(ShapeInferenceTest, UnboundedAllReduce) { << " expected: " << ShapeUtil::HumanString(expected); } +TEST_F(ShapeInferenceTest, UnboundedAllToAll) { + TF_ASSERT_OK_AND_ASSIGN(const Shape operand, ParseShape("f32[?, 10]")); + TF_ASSERT_OK_AND_ASSIGN(const Shape expected, ParseShape("f32[?, 10]")); + TF_ASSERT_OK_AND_ASSIGN( + const Shape inferred_shape, + ShapeInference::InferAllToAllShape(/*shape=*/operand, + /*split_dimension=*/0, + /*concat_dimension=*/0, + /*split_count=*/3)); + EXPECT_TRUE(ShapeUtil::Equal(inferred_shape, expected)) + << "inferred: " << ShapeUtil::HumanString(inferred_shape) + << " expected: " << ShapeUtil::HumanString(expected); +} + +TEST_F(ShapeInferenceTest, UnboundedAllToAllTupleUnsupported) { + TF_ASSERT_OK_AND_ASSIGN(const Shape operand, ParseShape("f32[?, 10]")); + TF_ASSERT_OK_AND_ASSIGN(const Shape expected, + ParseShape("(f32[?, 10], f32[?, 10])")); + const absl::StatusOr inferred_shape = + ShapeInference::InferAllToAllTupleShape( + /*operand_shapes=*/{&operand, &operand}); + EXPECT_THAT( + inferred_shape.status().message(), + HasSubstr("AllToAllTuple does not support unbounded dynamic shapes")); +} + TEST_P(UnboundedLogicalOpShapeInferenceTest, UnboundedAnd) { TF_ASSERT_OK_AND_ASSIGN(const Shape lhs, ParseShape(GetParam().lhs)); TF_ASSERT_OK_AND_ASSIGN(const Shape rhs, ParseShape(GetParam().rhs)); From ec95a1e4bb77931881f2881f4846ec73212e2c4e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 12:37:00 -0700 Subject: [PATCH 131/478] Create basic server coverage and model tests. PiperOrigin-RevId: 633672834 --- tensorflow/compiler/mlir/quantization/stablehlo/python/BUILD | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/python/BUILD b/tensorflow/compiler/mlir/quantization/stablehlo/python/BUILD index a4bdf7da09f8ee..0999d37da524c2 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/python/BUILD +++ b/tensorflow/compiler/mlir/quantization/stablehlo/python/BUILD @@ -30,6 +30,7 @@ package( pytype_strict_library( name = "quantization", srcs = ["quantization.py"], + visibility = ["//visibility:public"], deps = [ ":pywrap_quantization", "//tensorflow/compiler/mlir/quantization/stablehlo:quantization_config_proto_py", @@ -45,6 +46,10 @@ pytype_strict_library( # testonly = 1, # srcs = ["integration_test/quantize_model_test_base.py"], # tags = ["no_pip"], +# visibility = [ +# "//learning/brain/mlir/quantization/stablehlo:__subpackages__", +# "//tensorflow/compiler/mlir/quantization:__subpackages__", +# ], # deps = [ # "//third_party/py/mlir:ir", # "//third_party/py/mlir:stablehlo_dialect", From e621b7eb3ce7a806c5427ad68acf0b880871286f Mon Sep 17 00:00:00 2001 From: "Dimitar (Mitko) Asenov" Date: Tue, 14 May 2024 12:52:34 -0700 Subject: [PATCH 132/478] [XLA] Fix incorrect comment. PiperOrigin-RevId: 633677008 --- third_party/xla/xla/xla.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/xla/xla/xla.proto b/third_party/xla/xla/xla.proto index 4e0abf59b50906..391ddd053ffcad 100644 --- a/third_party/xla/xla/xla.proto +++ b/third_party/xla/xla/xla.proto @@ -519,7 +519,7 @@ message DebugOptions { // scratch), so this can be multiplied by quite a lot. int64 xla_gpu_redzone_padding_bytes = 228; - reserved 168; // Was xla_allow_excess_precision. + reserved 168; // Was xla_gpu_simplify_all_fp_conversions. // An experimental option to force all layouts present in the // after-optimizations HLO to be descending, e.g. From 39db3ef2065adeaf6ab48fd400a1bd4817717725 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 13:24:17 -0700 Subject: [PATCH 133/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633686615 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index c0a6aa926c51e0..4ac4c1beb21194 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugproto +go/nodeserialize op { name: "Abort" attr { From a1750554dc91ccf3c1585dbc90bcb1b2856855b7 Mon Sep 17 00:00:00 2001 From: Ilia Sergachev Date: Tue, 14 May 2024 13:33:54 -0700 Subject: [PATCH 134/478] PR #12465: [GPU] Fix handling of xla_gpu_require_complete_aot_autotune_results flag. Imported from GitHub PR https://github.com/openxla/xla/pull/12465 Copybara import of the project: -- 4a050dce154b924b98134294f3f15ffce35bdcbb by Ilia Sergachev : [GPU] Fix handling of xla_gpu_require_complete_aot_autotune_results flag. Merging this change closes #12465 PiperOrigin-RevId: 633690035 --- third_party/xla/xla/debug_options_flags.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/third_party/xla/xla/debug_options_flags.cc b/third_party/xla/xla/debug_options_flags.cc index aef3c9ef09de2c..8b782dbbe5dc39 100644 --- a/third_party/xla/xla/debug_options_flags.cc +++ b/third_party/xla/xla/debug_options_flags.cc @@ -249,6 +249,8 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { opts.set_xla_reduce_window_rewrite_base_length(32); + opts.set_xla_gpu_require_complete_aot_autotune_results(false); + return opts; } @@ -1427,7 +1429,7 @@ void MakeDebugOptionsFlags(std::vector* flag_list, "xla_gpu_require_complete_aot_autotune_results", bool_setter_for( &DebugOptions::set_xla_gpu_require_complete_aot_autotune_results), - debug_options->xla_gpu_multi_streamed_windowed_einsum(), + debug_options->xla_gpu_require_complete_aot_autotune_results(), "Whether to require complete AOT autotuning results.")); flag_list->push_back(tsl::Flag( "xla_gpu_auto_spmd_partitioning_memory_budget_gb", From 9bd24f556207dec3e7253c59e764c6a71d16fe4d Mon Sep 17 00:00:00 2001 From: David Dunleavy Date: Tue, 14 May 2024 14:07:59 -0700 Subject: [PATCH 135/478] Remove `gpu_any` tag in favor of `requires-gpu-nvidia` This also enables v100 tests. We now use the same sets of tags for gpu tests between internal and external, modulo the extra tags for local execution of gpu tests. PiperOrigin-RevId: 633700962 --- third_party/xla/.kokoro/linux/build.sh | 2 +- .../tsl/tsl/platform/default/build_config_root.bzl | 2 +- third_party/xla/xla/service/gpu/BUILD | 1 + third_party/xla/xla/service/gpu/tests/BUILD | 2 +- third_party/xla/xla/tools/multihost_hlo_runner/BUILD | 7 +++---- third_party/xla/xla/xla.bzl | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/third_party/xla/.kokoro/linux/build.sh b/third_party/xla/.kokoro/linux/build.sh index 81251bd3f44a6c..f9610594e5646d 100644 --- a/third_party/xla/.kokoro/linux/build.sh +++ b/third_party/xla/.kokoro/linux/build.sh @@ -54,7 +54,7 @@ RBE_FLAGS="" TARGET_FILTERS="-@local_tsl//tsl/platform:subprocess_test -@local_tsl//tsl/platform/cloud:google_auth_provider_test -@local_tsl//tsl/platform/cloud:oauth_client_test" if is_linux_gpu_job ; then - TAGS_FILTER="$TAGS_FILTER,gpu_any,requires-gpu-nvidia,-no_gpu" + TAGS_FILTER="$TAGS_FILTER,requires-gpu-nvidia,-no_gpu" # We are currently running XLA presubmits on machines with NVIDIA T4 GPUs, # which have a compute compatibility of 7.5. Se we filter out all the tests diff --git a/third_party/xla/third_party/tsl/tsl/platform/default/build_config_root.bzl b/third_party/xla/third_party/tsl/tsl/platform/default/build_config_root.bzl index a900565143027d..142641b16d2fa3 100644 --- a/third_party/xla/third_party/tsl/tsl/platform/default/build_config_root.bzl +++ b/third_party/xla/third_party/tsl/tsl/platform/default/build_config_root.bzl @@ -12,7 +12,7 @@ GPU_TEST_PROPERTIES = { } def tf_gpu_tests_tags(): - return ["gpu"] + gpu_test_tags() + return ["requires-gpu-nvidia", "gpu"] + gpu_test_tags() # terminology changes: saving tf_cuda_* for compatibility def tf_cuda_tests_tags(): diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 8b841bcf9be6ea..3d6c90b8119d96 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -5648,6 +5648,7 @@ xla_test( "gpu_a100", ], tags = [ + "no_oss", # Needs fix for `ConvertGenerator` "nomac", ], deps = [ diff --git a/third_party/xla/xla/service/gpu/tests/BUILD b/third_party/xla/xla/service/gpu/tests/BUILD index 1c3dfbf7311549..257e3ddf87973f 100644 --- a/third_party/xla/xla/service/gpu/tests/BUILD +++ b/third_party/xla/xla/service/gpu/tests/BUILD @@ -820,7 +820,7 @@ lit_test_suite( "//xla/tools/hlo_opt:gpu_specs/p100.txtpb", "//xla/tools/hlo_opt:gpu_specs/v100.txtpb", ], - default_tags = tf_cuda_tests_tags() + ["gpu_any"], + default_tags = tf_cuda_tests_tags(), tags_override = { "element_wise_row_vectorization.hlo": ["no_rocm"], "scatter_bf16.hlo": ["no_rocm"], diff --git a/third_party/xla/xla/tools/multihost_hlo_runner/BUILD b/third_party/xla/xla/tools/multihost_hlo_runner/BUILD index eb9b29fa9a3364..8014fbe8ed3f0d 100644 --- a/third_party/xla/xla/tools/multihost_hlo_runner/BUILD +++ b/third_party/xla/xla/tools/multihost_hlo_runner/BUILD @@ -1,6 +1,7 @@ load("@bazel_skylib//rules:build_test.bzl", "build_test") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm") +load("@local_tsl//tsl/platform:build_config_root.bzl", "tf_gpu_tests_tags") load("@local_tsl//tsl/platform:rules_cc.bzl", "cc_library") load("//xla:xla.bzl", "xla_cc_binary") load("//xla/tests:build_defs.bzl", "xla_test") @@ -16,7 +17,7 @@ build_test( name = "hlo_runner_main_build_test", tags = [ "cpu", - "gpu_any", + "gpu", # TODO(ddunleavy): this configuration of tags will only build on GPU in OSS. ], targets = [ ":hlo_runner_main", @@ -28,11 +29,9 @@ xla_cc_binary( testonly = True, srcs = ["hlo_runner_main.cc"], tags = [ - "gpu", - "gpu_any", "noasan", # Exceeds linker limit. "nomac", - ], + ] + tf_gpu_tests_tags(), deps = [ ":functional_hlo_runner", ":hlo_runner_flags", diff --git a/third_party/xla/xla/xla.bzl b/third_party/xla/xla/xla.bzl index 93508a19769711..2143baaf6b5d41 100644 --- a/third_party/xla/xla/xla.bzl +++ b/third_party/xla/xla/xla.bzl @@ -73,7 +73,7 @@ def xla_cc_binary(deps = [], copts = tsl_copts(), **kwargs): def xla_cc_test(name, deps = [], use_gpu = False, **kwargs): # Need to do it this way so that `tf_exec_properties` can read tags. _tags = kwargs.get("tags", []) - kwargs["tags"] = _tags + tf_gpu_tests_tags() + ["gpu_any"] if use_gpu else _tags + kwargs["tags"] = _tags + tf_gpu_tests_tags() if use_gpu else _tags native.cc_test( name = name, From a6b2dfb52b593d8e765aa1fcc664c802e112771a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 14:18:46 -0700 Subject: [PATCH 136/478] Fix build error for third_party/tensorflow/core/tfrt/gpu. PiperOrigin-RevId: 633704451 --- tensorflow/core/tfrt/gpu/kernel/BUILD | 1 + tensorflow/core/tfrt/gpu/kernel/gpu_runner_test.cc | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/tfrt/gpu/kernel/BUILD b/tensorflow/core/tfrt/gpu/kernel/BUILD index 4c9046644c93c2..2b79330c050d82 100644 --- a/tensorflow/core/tfrt/gpu/kernel/BUILD +++ b/tensorflow/core/tfrt/gpu/kernel/BUILD @@ -102,6 +102,7 @@ tf_cuda_cc_test( "//tensorflow/core/tfrt/common:pjrt_util", "//tensorflow/core/tfrt/fallback:fallback_state", "@com_google_googletest//:gtest_main", + "@local_tsl//tsl/framework:serving_device_selector_policies", "@tf_runtime//:hostcontext", "@tf_runtime//:tensor", ], diff --git a/tensorflow/core/tfrt/gpu/kernel/gpu_runner_test.cc b/tensorflow/core/tfrt/gpu/kernel/gpu_runner_test.cc index e2b999cb23ec06..7371f62df1f20b 100644 --- a/tensorflow/core/tfrt/gpu/kernel/gpu_runner_test.cc +++ b/tensorflow/core/tfrt/gpu/kernel/gpu_runner_test.cc @@ -15,6 +15,8 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/tfrt/gpu/kernel/gpu_runner.h" +#include + #include #include #include "tensorflow/cc/framework/scope.h" @@ -145,7 +147,7 @@ class GpuRunnerTest : public ::testing::Test { exec_ctx_ = std::make_unique(std::move(*req_ctx)); // Create a gpu runner. - auto policy = std::make_unique(); + auto policy = std::make_unique(); serving_device_selector_ = std::make_unique( kNumVirtualGpuDevices, std::move(policy)); gpu_runner_ = std::make_unique(serving_device_selector_.get()); From edfcdee26c09216b3ef5e6fadbeb7828f3c7bcc2 Mon Sep 17 00:00:00 2001 From: Kuy Mainwaring Date: Tue, 14 May 2024 14:29:23 -0700 Subject: [PATCH 137/478] [XLA:GPU] Clang-tidy fixes for xla/service/gpu/runtime/sequential_thunk.cc PiperOrigin-RevId: 633708120 --- third_party/xla/xla/service/gpu/runtime/sequential_thunk.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/third_party/xla/xla/service/gpu/runtime/sequential_thunk.cc b/third_party/xla/xla/service/gpu/runtime/sequential_thunk.cc index 143ad94a29071d..c58f31b5207a7a 100644 --- a/third_party/xla/xla/service/gpu/runtime/sequential_thunk.cc +++ b/third_party/xla/xla/service/gpu/runtime/sequential_thunk.cc @@ -28,8 +28,6 @@ limitations under the License. namespace xla { namespace gpu { -using ::tsl::profiler::ScopedAnnotation; - SequentialThunk::SequentialThunk(ThunkInfo thunk_info, ThunkSequence thunks) : Thunk(Kind::kSequential, thunk_info), thunks_(std::move(thunks)) {} From f8e34052d7c6a44cb6fdb593f1d16e4d6db30610 Mon Sep 17 00:00:00 2001 From: Bryan Massoth Date: Tue, 14 May 2024 15:00:17 -0700 Subject: [PATCH 138/478] Fix NPE bug in XContextStatsAccessor::GetStat() when stats_metadata_ is nullptr. Refactor preprocess_xplane to enforce nullability as part of type. PiperOrigin-RevId: 633717014 --- .../third_party/tsl/tsl/profiler/utils/BUILD | 1 + .../tsl/profiler/utils/preprocess_xplane.cc | 18 +- .../tsl/profiler/utils/preprocess_xplane.h | 218 +++++++++--------- .../profiler/utils/preprocess_xplane_test.cc | 15 ++ 4 files changed, 137 insertions(+), 115 deletions(-) diff --git a/third_party/xla/third_party/tsl/tsl/profiler/utils/BUILD b/third_party/xla/third_party/tsl/tsl/profiler/utils/BUILD index f0145697048df9..26358c8ec80db5 100644 --- a/third_party/xla/third_party/tsl/tsl/profiler/utils/BUILD +++ b/third_party/xla/third_party/tsl/tsl/profiler/utils/BUILD @@ -446,6 +446,7 @@ tsl_cc_test( "//tsl/platform:test", "//tsl/platform:test_main", "//tsl/profiler/lib:connected_traceme", + "//tsl/profiler/protobuf:xplane_proto_cc", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/hash", ], diff --git a/third_party/xla/third_party/tsl/tsl/profiler/utils/preprocess_xplane.cc b/third_party/xla/third_party/tsl/tsl/profiler/utils/preprocess_xplane.cc index 7e8e7f3a431c97..3d06a05609a118 100644 --- a/third_party/xla/third_party/tsl/tsl/profiler/utils/preprocess_xplane.cc +++ b/third_party/xla/third_party/tsl/tsl/profiler/utils/preprocess_xplane.cc @@ -38,16 +38,16 @@ using ::tsl::profiler::XPlane; using ::tsl::profiler::XPlaneBuilder; using ::tsl::profiler::XSpace; -void MutateXPlane(XPlane* plane, +void MutateXPlane(XPlane& plane, const std::vector>& mutator_factories) { - XPlaneBuilder plane_builder(plane); + XPlaneBuilder plane_builder(&plane); absl::flat_hash_map>> mutators_from_event_metadata_id; std::vector> line_mutators; for (const auto& mutator_factory : mutator_factories) { - auto mutators = mutator_factory->CreateMutators(&plane_builder); + auto mutators = mutator_factory->CreateMutators(plane_builder); for (auto& mutator : mutators) { if (mutator->event_metadata()) { auto id = mutator->event_metadata()->id(); @@ -63,7 +63,7 @@ void MutateXPlane(XPlane* plane, plane_builder.ForEachLine([&](XLineBuilder line_builder) { for (const auto& mutator : line_mutators) { - mutator->MutateEventsInLine(&line_builder); + mutator->MutateEventsInLine(line_builder); } if (mutators_from_event_metadata_id.empty()) return; line_builder.ForEachEvent([&](XEventBuilder event_builder) { @@ -71,7 +71,7 @@ void MutateXPlane(XPlane* plane, mutators_from_event_metadata_id.find(event_builder.MetadataId()); if (event_mutators != mutators_from_event_metadata_id.end()) { for (const auto& mutator : event_mutators->second) { - mutator->Mutate(&event_builder); + mutator->Mutate(event_builder); } } }); @@ -150,14 +150,18 @@ CreateMutatorFactories() { } // namespace void PreprocessXPlane(XPlane* plane) { + if (plane == nullptr) return; + auto mutator_factories = CreateMutatorFactories(); - MutateXPlane(plane, mutator_factories); + MutateXPlane(*plane, mutator_factories); } void PreprocessXSpace(XSpace* space) { + if (space == nullptr) return; + auto mutator_factories = CreateMutatorFactories(); for (XPlane& plane : *space->mutable_planes()) { - MutateXPlane(&plane, mutator_factories); + MutateXPlane(plane, mutator_factories); } } diff --git a/third_party/xla/third_party/tsl/tsl/profiler/utils/preprocess_xplane.h b/third_party/xla/third_party/tsl/tsl/profiler/utils/preprocess_xplane.h index 2433cd825cc842..724abb30429968 100644 --- a/third_party/xla/third_party/tsl/tsl/profiler/utils/preprocess_xplane.h +++ b/third_party/xla/third_party/tsl/tsl/profiler/utils/preprocess_xplane.h @@ -52,9 +52,9 @@ class XplaneEventMutator { virtual ~XplaneEventMutator() = default; // Mutate event by event specified by the event_metadata. - virtual void Mutate(XEventBuilder* builder) = 0; + virtual void Mutate(XEventBuilder& builder) = 0; // Mutate line by line if event_metadata() return nullptr. - virtual void MutateEventsInLine(XLineBuilder* line) = 0; + virtual void MutateEventsInLine(XLineBuilder& line) = 0; const XEventMetadata* event_metadata() const { return event_metadata_; } @@ -70,7 +70,7 @@ class XplaneEventMutatorFactory { virtual ~XplaneEventMutatorFactory() = default; virtual std::vector> CreateMutators( - XPlaneBuilder* xplane) const = 0; + XPlaneBuilder& xplane) const = 0; protected: XplaneEventMutatorFactory() = default; @@ -84,21 +84,20 @@ class XplaneRootEventMutatorFactory : public XplaneEventMutatorFactory { public: static std::unique_ptr CreateFactory( HostEventType event_type, int64_t root_level) { - std::unique_ptr base; - base.reset(new XplaneRootEventMutatorFactory(event_type, root_level)); - return base; + return absl::WrapUnique( + new XplaneRootEventMutatorFactory(event_type, root_level)); } std::vector> CreateMutators( - XPlaneBuilder* xplane) const override { + XPlaneBuilder& xplane) const override { std::vector> mutators; - XEventMetadata* event_metadata = - xplane->GetEventMetadata(GetHostEventTypeStr(event_type_)); - if (event_metadata == nullptr) return {}; - XStatMetadata* root_metadata = - xplane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kIsRoot)); - mutators.emplace_back(std::make_unique( - event_metadata, root_metadata, root_level_)); + if (auto* event_metadata = + xplane.GetEventMetadata(GetHostEventTypeStr(event_type_))) { + XStatMetadata* root_metadata = + xplane.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kIsRoot)); + mutators.emplace_back(std::make_unique( + event_metadata, *root_metadata, root_level_)); + } return mutators; } @@ -110,20 +109,20 @@ class XplaneRootEventMutatorFactory : public XplaneEventMutatorFactory { class XplaneRootEventMutator : public XplaneEventMutator { public: XplaneRootEventMutator(XEventMetadata* event_metadata, - XStatMetadata* root_stats_metadata, + XStatMetadata& root_stats_metadata, int64_t root_level) : XplaneEventMutator(event_metadata), root_stats_metadata_(root_stats_metadata), root_level_(root_level) {} - void Mutate(XEventBuilder* event_builder) override { - event_builder->SetOrAddStatValue(*root_stats_metadata_, root_level_); + void Mutate(XEventBuilder& event_builder) override { + event_builder.SetOrAddStatValue(root_stats_metadata_, root_level_); } - void MutateEventsInLine(XLineBuilder* line) override { + void MutateEventsInLine(XLineBuilder& line) override { CHECK(false); // Crash OK } private: - XStatMetadata* root_stats_metadata_; + XStatMetadata& root_stats_metadata_; int64_t root_level_; }; @@ -136,18 +135,19 @@ class XContextStatsAccessor { public: using value_type = StatValueType; - bool Initialize(XPlaneBuilder* xplane) { - stats_metadata_ = xplane->GetStatMetadata(GetStatTypeStr(kStatId)); + bool Initialize(XPlaneBuilder& xplane) { + stats_metadata_ = xplane.GetStatMetadata(GetStatTypeStr(kStatId)); return stats_metadata_; } - std::optional GetStat(XEventBuilder* event_builder) { - auto* stat = event_builder->GetStat(*stats_metadata_); + std::optional GetStat(XEventBuilder& event_builder) { + if (stats_metadata_ == nullptr) return std::nullopt; + auto* stat = event_builder.GetStat(*stats_metadata_); if (stat == nullptr) return std::nullopt; if constexpr (std::is_integral_v) { - return event_builder->IntOrUintValue(*stat); + return event_builder.IntOrUintValue(*stat); } else { - return event_builder->StrOrRefValue(*stat); + return event_builder.StrOrRefValue(*stat); } } @@ -160,19 +160,19 @@ class XContextStatsAccessorWithDefault { public: using value_type = StatValueType; - bool Initialize(XPlaneBuilder* xplane) { - stats_metadata_ = xplane->GetStatMetadata(GetStatTypeStr(kStatId)); + bool Initialize(XPlaneBuilder& xplane) { + stats_metadata_ = xplane.GetStatMetadata(GetStatTypeStr(kStatId)); return true; // Always return true, even stat_metadata doesn't exist. } - std::optional GetStat(XEventBuilder* event_builder) { + std::optional GetStat(XEventBuilder& event_builder) { if (stats_metadata_ == nullptr) return kDefaultValue; - auto* stat = event_builder->GetStat(*stats_metadata_); + auto* stat = event_builder.GetStat(*stats_metadata_); if (stat == nullptr) return kDefaultValue; if constexpr (std::is_integral_v) { - return event_builder->IntOrUintValue(*stat); + return event_builder.IntOrUintValue(*stat); } else { - return event_builder->StrOrRefValue(*stat); + return event_builder.StrOrRefValue(*stat); } } @@ -222,40 +222,40 @@ class XplaneConnectedEventMutatorFactory : public XplaneEventMutatorFactory { using StatsAccessors = std::tuple; std::vector> CreateMutators( - XPlaneBuilder* xplane) const override { + XPlaneBuilder& xplane) const override { // Check if all stats exist in current plane. StatsAccessors stats_accessors; bool all_required_stats_exist = true; auto check_stats_meta = [&all_required_stats_exist, - xplane](auto&& accessor) { - if (all_required_stats_exist == false) return; - if (!accessor.Initialize(xplane)) all_required_stats_exist = false; + &xplane](auto&& accessor) { + all_required_stats_exist = + all_required_stats_exist && accessor.Initialize(xplane); }; for_each(stats_accessors, check_stats_meta); if (!all_required_stats_exist) return {}; XEventMetadata* producer_event_metadata = - xplane->GetEventMetadata(GetHostEventTypeStr(producer_event)); + xplane.GetEventMetadata(GetHostEventTypeStr(producer_event)); XEventMetadata* consumer_event_metadata = - xplane->GetEventMetadata(GetHostEventTypeStr(consumer_event)); + xplane.GetEventMetadata(GetHostEventTypeStr(consumer_event)); std::vector> mutators; if (producer_event_metadata) { - XStatMetadata* context_type_metadata = xplane->GetOrCreateStatMetadata( + XStatMetadata* context_type_metadata = xplane.GetOrCreateStatMetadata( GetStatTypeStr(StatType::kProducerType)); - XStatMetadata* context_id_metadata = xplane->GetOrCreateStatMetadata( - GetStatTypeStr(StatType::kProducerId)); + XStatMetadata* context_id_metadata = + xplane.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kProducerId)); mutators.emplace_back(std::make_unique( - producer_event_metadata, context_type_metadata, context_id_metadata, + producer_event_metadata, *context_type_metadata, *context_id_metadata, stats_accessors)); } if (consumer_event_metadata) { - XStatMetadata* context_type_metadata = xplane->GetOrCreateStatMetadata( + XStatMetadata* context_type_metadata = xplane.GetOrCreateStatMetadata( GetStatTypeStr(StatType::kConsumerType)); - XStatMetadata* context_id_metadata = xplane->GetOrCreateStatMetadata( - GetStatTypeStr(StatType::kConsumerId)); + XStatMetadata* context_id_metadata = + xplane.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kConsumerId)); mutators.emplace_back(std::make_unique( - consumer_event_metadata, context_type_metadata, context_id_metadata, + consumer_event_metadata, *context_type_metadata, *context_id_metadata, stats_accessors)); } return mutators; @@ -267,19 +267,19 @@ class XplaneConnectedEventMutatorFactory : public XplaneEventMutatorFactory { class XplaneConnectedEventMutator : public XplaneEventMutator { public: XplaneConnectedEventMutator(XEventMetadata* event_metadata, - XStatMetadata* context_type_metadata, - XStatMetadata* context_id_metadata, + XStatMetadata& context_type_metadata, + XStatMetadata& context_id_metadata, const StatsAccessors& accessors) : XplaneEventMutator(event_metadata), context_type_metadata_(context_type_metadata), context_id_metadata_(context_id_metadata), accessors_(accessors) {} - void Mutate(XEventBuilder* event_builder) override { + void Mutate(XEventBuilder& event_builder) override { bool all_required_stats_exist = true; std::vector> required_stats; auto check_stats_meta = [&all_required_stats_exist, &required_stats, - event_builder](auto&& accessor) { + &event_builder](auto&& accessor) { if (all_required_stats_exist == false) return; auto stats_data = accessor.GetStat(event_builder); if (!stats_data) { @@ -299,18 +299,18 @@ class XplaneConnectedEventMutatorFactory : public XplaneEventMutatorFactory { context_id = absl::HashOf(producer_event, consumer_event, required_stats); } - event_builder->SetOrAddStatValue(*context_type_metadata_, - static_cast(context_type)); - event_builder->SetOrAddStatValue(*context_id_metadata_, context_id); + event_builder.SetOrAddStatValue(context_type_metadata_, + static_cast(context_type)); + event_builder.SetOrAddStatValue(context_id_metadata_, context_id); } - void MutateEventsInLine(XLineBuilder* line) override { + void MutateEventsInLine(XLineBuilder& line) override { CHECK(false); // Crash OK } private: - XStatMetadata* context_type_metadata_; - XStatMetadata* context_id_metadata_; + XStatMetadata& context_type_metadata_; + XStatMetadata& context_id_metadata_; StatsAccessors accessors_; }; }; @@ -323,17 +323,18 @@ class HostRunIdMutatorFactory : public XplaneEventMutatorFactory { } std::vector> CreateMutators( - XPlaneBuilder* xplane) const override { + XPlaneBuilder& xplane) const override { std::vector> mutators; - XEventMetadata* event_metadata = - xplane->GetEventMetadata(GetHostEventTypeStr(event_type)); - if (event_metadata == nullptr) return {}; - XContextStatsAccessor run_id_stats_accessor; - run_id_stats_accessor.Initialize(xplane); - XStatMetadata* run_id_metadata = - xplane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kRunId)); - mutators.emplace_back(std::make_unique( - event_metadata, run_id_stats_accessor, run_id_metadata)); + if (auto* event_metadata = + xplane.GetEventMetadata(GetHostEventTypeStr(event_type))) { + XContextStatsAccessor run_id_stats_accessor; + if (run_id_stats_accessor.Initialize(xplane)) { + XStatMetadata* run_id_metadata = + xplane.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kRunId)); + mutators.emplace_back(std::make_unique( + event_metadata, run_id_stats_accessor, *run_id_metadata)); + } + } return mutators; } @@ -344,25 +345,25 @@ class HostRunIdMutatorFactory : public XplaneEventMutatorFactory { HostRunIdMutator( XEventMetadata* event_metadata, XContextStatsAccessor run_id_stats_accessor, - XStatMetadata* run_id_metadata) + XStatMetadata& run_id_metadata) : XplaneEventMutator(event_metadata), run_id_stats_accessor_(run_id_stats_accessor), run_id_metadata_(run_id_metadata) {} - void Mutate(XEventBuilder* event_builder) override { + void Mutate(XEventBuilder& event_builder) override { auto run_id = run_id_stats_accessor_.GetStat(event_builder); if (!run_id) return; int64_t fixed_run_id = ((uint64_t)run_id.value() & kRunIdMask); - event_builder->SetOrAddStatValue(*run_id_metadata_, fixed_run_id); + event_builder.SetOrAddStatValue(run_id_metadata_, fixed_run_id); } - void MutateEventsInLine(XLineBuilder* line) override { + void MutateEventsInLine(XLineBuilder& line) override { CHECK(false); // Crash OK } private: XContextStatsAccessor run_id_stats_accessor_; - XStatMetadata* run_id_metadata_; + XStatMetadata& run_id_metadata_; }; }; @@ -377,27 +378,28 @@ class TpuModuleLineMutatorFactory : public XplaneEventMutatorFactory { } std::vector> CreateMutators( - XPlaneBuilder* xplane) const override { + XPlaneBuilder& xplane) const override { std::vector> mutators; - if (absl::StartsWith(xplane->Name(), kTpuPlanePrefix) && - GetTensorCoreId(xplane->Name()).has_value()) { - if (auto device_ordinal = ParseDeviceOrdinal(xplane->Name())) { - XStatMetadata* context_type_metadata = xplane->GetOrCreateStatMetadata( + if (absl::StartsWith(xplane.Name(), kTpuPlanePrefix) && + GetTensorCoreId(xplane.Name()).has_value()) { + if (auto device_ordinal = ParseDeviceOrdinal(xplane.Name())) { + XStatMetadata* context_type_metadata = xplane.GetOrCreateStatMetadata( GetStatTypeStr(StatType::kConsumerType)); - XStatMetadata* context_id_metadata = xplane->GetOrCreateStatMetadata( + XStatMetadata* context_id_metadata = xplane.GetOrCreateStatMetadata( GetStatTypeStr(StatType::kConsumerId)); XContextStatsAccessor queue_id_stats_accessor; XContextStatsAccessor run_id_stats_accessor; XContextStatsAccessorWithDefault core_type_stats_accessor; - queue_id_stats_accessor.Initialize(xplane); - run_id_stats_accessor.Initialize(xplane); - core_type_stats_accessor.Initialize(xplane); - mutators.emplace_back(std::make_unique( - *device_ordinal, context_type_metadata, context_id_metadata, - queue_id_stats_accessor, run_id_stats_accessor, - core_type_stats_accessor)); + if (queue_id_stats_accessor.Initialize(xplane) && + run_id_stats_accessor.Initialize(xplane) && + core_type_stats_accessor.Initialize(xplane)) { + mutators.emplace_back(std::make_unique( + *device_ordinal, *context_type_metadata, *context_id_metadata, + queue_id_stats_accessor, run_id_stats_accessor, + core_type_stats_accessor)); + } } } return mutators; @@ -409,8 +411,8 @@ class TpuModuleLineMutatorFactory : public XplaneEventMutatorFactory { class TpuModuleLineMutator : public XplaneEventMutator { public: TpuModuleLineMutator( - uint32_t device_ordinal, XStatMetadata* context_type_metadata, - XStatMetadata* context_id_metadata, + uint32_t device_ordinal, XStatMetadata& context_type_metadata, + XStatMetadata& context_id_metadata, XContextStatsAccessor queue_id_stats_accessor, XContextStatsAccessor run_id_stats_accessor, @@ -424,16 +426,16 @@ class TpuModuleLineMutatorFactory : public XplaneEventMutatorFactory { run_id_stats_accessor_(run_id_stats_accessor), core_type_stats_accessor_(core_type_stats_accessor) {} - void Mutate(XEventBuilder* event_builder) override { + void Mutate(XEventBuilder& event_builder) override { CHECK(false); // Crash OK } - void MutateEventsInLine(XLineBuilder* line) override { - if (line->Name() != kXlaModuleLineName) return; - line->ForEachEvent([&](XEventBuilder event) { - auto run_id = run_id_stats_accessor_.GetStat(&event); - auto queue_id = queue_id_stats_accessor_.GetStat(&event); - auto core_type = core_type_stats_accessor_.GetStat(&event); + void MutateEventsInLine(XLineBuilder& line) override { + if (line.Name() != kXlaModuleLineName) return; + line.ForEachEvent([&](XEventBuilder event) { + auto run_id = run_id_stats_accessor_.GetStat(event); + auto queue_id = queue_id_stats_accessor_.GetStat(event); + auto core_type = core_type_stats_accessor_.GetStat(event); if (!run_id || !queue_id) return; // The order of tuple need to be // consistent with other kTpuLaunch types. @@ -444,16 +446,16 @@ class TpuModuleLineMutatorFactory : public XplaneEventMutatorFactory { required_stats.emplace_back(*run_id); required_stats.emplace_back(static_cast(*core_type)); int64_t context_id = absl::HashOf(required_stats); - event.SetOrAddStatValue(*context_type_metadata_, + event.SetOrAddStatValue(context_type_metadata_, static_cast(ContextType::kTpuLaunch)); - event.SetOrAddStatValue(*context_id_metadata_, context_id); + event.SetOrAddStatValue(context_id_metadata_, context_id); }); } private: uint64_t device_ordinal_; - XStatMetadata* context_type_metadata_; - XStatMetadata* context_id_metadata_; + XStatMetadata& context_type_metadata_; + XStatMetadata& context_id_metadata_; XContextStatsAccessor queue_id_stats_accessor_; XContextStatsAccessor run_id_stats_accessor_; @@ -473,7 +475,7 @@ class ThreadpoolLineMutatorFactory : public XplaneEventMutatorFactory { } std::vector> CreateMutators( - XPlaneBuilder* xplane) const override { + XPlaneBuilder& xplane) const override { std::vector> mutators; mutators.emplace_back(std::make_unique(xplane)); return mutators; @@ -484,25 +486,25 @@ class ThreadpoolLineMutatorFactory : public XplaneEventMutatorFactory { class ThreadpoolLineMutator : public XplaneEventMutator { public: - explicit ThreadpoolLineMutator(XPlaneBuilder* xplane) + explicit ThreadpoolLineMutator(XPlaneBuilder& xplane) : XplaneEventMutator(nullptr), xplane_(xplane) { start_region_metadata_ = - xplane_->GetEventMetadata(kThreadpoolListenerStartRegion); + xplane_.GetEventMetadata(kThreadpoolListenerStartRegion); stop_region_metadata_ = - xplane_->GetEventMetadata(kThreadpoolListenerStopRegion); + xplane_.GetEventMetadata(kThreadpoolListenerStopRegion); thread_pool_metadata_ = - xplane_->GetOrCreateEventMetadata(kThreadpoolListenerRegion); - consumer_ = xplane_->GetOrCreateStatMetadata( + xplane_.GetOrCreateEventMetadata(kThreadpoolListenerRegion); + consumer_ = xplane_.GetOrCreateStatMetadata( GetStatTypeStr(StatType::kConsumerId)); - consumer_type_ = xplane_->GetOrCreateStatMetadata( + consumer_type_ = xplane_.GetOrCreateStatMetadata( GetStatTypeStr(StatType::kConsumerType)); } - void Mutate(XEventBuilder* event_builder) override { + void Mutate(XEventBuilder& event_builder) override { CHECK(false); // Crash OK } - void MutateEventsInLine(XLineBuilder* line) override { + void MutateEventsInLine(XLineBuilder& line) override { if (start_region_metadata_ == nullptr || stop_region_metadata_ == nullptr) { // Skip mutations for xplanes that do not have region markers. These @@ -519,7 +521,7 @@ class ThreadpoolLineMutatorFactory : public XplaneEventMutatorFactory { }; std::vector event_metadata; - line->ForEachEvent([&](const XEventBuilder& event) { + line.ForEachEvent([&](const XEventBuilder& event) { if (event.MetadataId() == start_region_metadata_->id()) { auto consumer_id = event.GetStat(*consumer_); if (!consumer_id) return; @@ -535,7 +537,7 @@ class ThreadpoolLineMutatorFactory : public XplaneEventMutatorFactory { } }); for (const auto& event_metadata : event_metadata) { - XEventBuilder region = line->AddEvent(*thread_pool_metadata_); + XEventBuilder region = line.AddEvent(*thread_pool_metadata_); region.SetTimestampPs(event_metadata.start_region_timestamp_ps); region.SetEndTimestampPs(event_metadata.end_region_timestamp_ps); region.SetOrAddStatValue(*consumer_, event_metadata.region_id); @@ -548,7 +550,7 @@ class ThreadpoolLineMutatorFactory : public XplaneEventMutatorFactory { private: XStatMetadata* consumer_; XStatMetadata* consumer_type_; - XPlaneBuilder* xplane_; + XPlaneBuilder& xplane_; XEventMetadata* start_region_metadata_; XEventMetadata* stop_region_metadata_; XEventMetadata* thread_pool_metadata_; diff --git a/third_party/xla/third_party/tsl/tsl/profiler/utils/preprocess_xplane_test.cc b/third_party/xla/third_party/tsl/tsl/profiler/utils/preprocess_xplane_test.cc index 5912c00c397853..9712893645090c 100644 --- a/third_party/xla/third_party/tsl/tsl/profiler/utils/preprocess_xplane_test.cc +++ b/third_party/xla/third_party/tsl/tsl/profiler/utils/preprocess_xplane_test.cc @@ -16,12 +16,14 @@ limitations under the License. #include "tsl/profiler/utils/preprocess_xplane.h" #include +#include #include #include "absl/container/flat_hash_map.h" #include "absl/hash/hash.h" #include "tsl/platform/test.h" #include "tsl/profiler/lib/connected_traceme.h" +#include "tsl/profiler/protobuf/xplane.pb.h" #include "tsl/profiler/utils/tf_xplane_visitor.h" #include "tsl/profiler/utils/xplane_builder.h" #include "tsl/profiler/utils/xplane_schema.h" @@ -290,6 +292,19 @@ TEST(PreprocessXPlane, ThreadPoolPreprocessorTest) { EXPECT_TRUE(new_event_added); } +TEST(PreprocessXPlane, XContextStatsAccessorNPETest) { + auto xplane = std::make_unique(); + XPlaneBuilder xplane_builder(xplane.get()); + XLine xline; + XLineBuilder xline_builder(&xline, &xplane_builder); + XEvent xevent; + XEventBuilder xevent_builder(&xline, &xplane_builder, &xevent); + XContextStatsAccessor run_id_accessor; + + ASSERT_FALSE(run_id_accessor.Initialize(xplane_builder)); + EXPECT_EQ(run_id_accessor.GetStat(xevent_builder), std::nullopt); +} + } // namespace } // namespace profiler } // namespace tsl From c4f3a1d9180968265f779513ca4ac2292799cf3a Mon Sep 17 00:00:00 2001 From: Kyle Lucke Date: Tue, 14 May 2024 15:18:47 -0700 Subject: [PATCH 139/478] Remove StreamExecutorInterface::AllocateStream in favor of making CreateStream do the necessary work. PiperOrigin-RevId: 633722475 --- .../stream_executor/stream_executor.cc | 14 +++---------- .../xla/xla/backends/interpreter/executor.h | 2 -- .../xla/stream_executor/cuda/cuda_executor.cc | 20 +++++++++---------- .../xla/stream_executor/gpu/gpu_executor.h | 2 -- .../xla/xla/stream_executor/gpu/gpu_stream.h | 3 +-- .../xla/stream_executor/host/host_executor.cc | 6 +----- .../xla/stream_executor/host/host_executor.h | 1 - .../stream_executor/mock_stream_executor.h | 1 - .../xla/stream_executor/rocm/rocm_executor.cc | 20 +++++++++---------- third_party/xla/xla/stream_executor/stream.cc | 11 ---------- third_party/xla/xla/stream_executor/stream.h | 5 ----- .../stream_executor_interface.h | 4 ---- .../xla/stream_executor/tpu/tpu_executor.cc | 6 ------ .../xla/stream_executor/tpu/tpu_executor.h | 2 -- .../tpu/tpu_executor_init_fns.inc | 1 - 15 files changed, 25 insertions(+), 73 deletions(-) diff --git a/tensorflow/c/experimental/stream_executor/stream_executor.cc b/tensorflow/c/experimental/stream_executor/stream_executor.cc index c524dd1d27ac89..6e706366d47127 100644 --- a/tensorflow/c/experimental/stream_executor/stream_executor.cc +++ b/tensorflow/c/experimental/stream_executor/stream_executor.cc @@ -438,14 +438,6 @@ class CStreamExecutor : public StreamExecutor { stream_executor_->get_event_status(&device_, event_handle); return SEEventStatusToEventStatus(event_status); } - bool AllocateStream(Stream* stream) override { - DCHECK(stream != nullptr); - absl::Status status = - static_cast(stream->implementation())->Create(); - // TODO(annarev): update AllocateStream to return status instead - // (similar to AllocateEvent). - return status.ok(); - } void DeallocateStream(Stream* stream) override { static_cast(stream->implementation())->Destroy(); } @@ -568,9 +560,9 @@ class CStreamExecutor : public StreamExecutor { absl::StatusOr> CreateStream( std::optional> priority = std::nullopt) override { - auto stream = std::make_unique( - this, std::make_unique(&device_, stream_executor_)); - TF_RETURN_IF_ERROR(stream->Initialize(priority)); + auto c_stream = std::make_unique(&device_, stream_executor_); + TF_RETURN_IF_ERROR(c_stream->Create()); + auto stream = std::make_unique(this, std::move(c_stream)); return std::move(stream); } diff --git a/third_party/xla/xla/backends/interpreter/executor.h b/third_party/xla/xla/backends/interpreter/executor.h index 9f72e99dd0a69d..7ca85d7c22048f 100644 --- a/third_party/xla/xla/backends/interpreter/executor.h +++ b/third_party/xla/xla/backends/interpreter/executor.h @@ -132,7 +132,6 @@ class XlaInterpreterExecutor : public StreamExecutor { return Event::Status::kError; } - bool AllocateStream(Stream *stream) override { return true; } void DeallocateStream(Stream *stream) override {} bool CreateStreamDependency(Stream *dependent, Stream *other) override; @@ -167,7 +166,6 @@ class XlaInterpreterExecutor : public StreamExecutor { std::nullopt) override { auto stream = std::make_unique(this, std::make_unique()); - TF_RETURN_IF_ERROR(stream->Initialize(priority)); return std::move(stream); } diff --git a/third_party/xla/xla/stream_executor/cuda/cuda_executor.cc b/third_party/xla/xla/stream_executor/cuda/cuda_executor.cc index edcd40c16aba7f..596a98fa028e14 100644 --- a/third_party/xla/xla/stream_executor/cuda/cuda_executor.cc +++ b/third_party/xla/xla/stream_executor/cuda/cuda_executor.cc @@ -794,13 +794,6 @@ Event::Status GpuExecutor::PollForEventStatus(Event* event) { return AsGpuEvent(event)->PollForStatus(); } -bool GpuExecutor::AllocateStream(Stream* stream) { - absl::MutexLock l(&alive_gpu_streams_mu_); - bool out = AsGpuStream(stream)->Init(); - alive_gpu_streams_[stream->platform_specific_handle().stream] = stream; - return out; -} - void GpuExecutor::DeallocateStream(Stream* stream) { { absl::MutexLock lock(&mu_); @@ -969,9 +962,16 @@ absl::StatusOr> GpuExecutor::CreateStream( gpu_stream->SetPriority(std::get(*priority)); } } - auto stream = std::make_unique(this, std::move(gpu_stream)); - TF_RETURN_IF_ERROR(stream->Initialize(priority)); - return std::move(stream); + absl::MutexLock l(&alive_gpu_streams_mu_); + bool init_worked = gpu_stream->Init(); + if (init_worked) { + auto platform_specific_stream = gpu_stream->platform_specific_stream(); + auto stream = std::make_unique(this, std::move(gpu_stream)); + alive_gpu_streams_[platform_specific_stream] = stream.get(); + return std::move(stream); + } else { + return absl::InvalidArgumentError("Failed to initialize gpu stream"); + } } absl::StatusOr> GpuExecutor::CreateKernel() { diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_executor.h b/third_party/xla/xla/stream_executor/gpu/gpu_executor.h index ab4b97db67c8f3..4fda798c54078f 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_executor.h +++ b/third_party/xla/xla/stream_executor/gpu/gpu_executor.h @@ -229,8 +229,6 @@ class GpuExecutor : public StreamExecutor { bool HostCallback(Stream* stream, absl::AnyInvocable callback) override; - bool AllocateStream(Stream* stream) override; - void DeallocateStream(Stream* stream) override; bool CreateStreamDependency(Stream* dependent, Stream* other) override; diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_stream.h b/third_party/xla/xla/stream_executor/gpu/gpu_stream.h index ebd143953e54b5..a06839b5c1c79b 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_stream.h +++ b/third_party/xla/xla/stream_executor/gpu/gpu_stream.h @@ -45,8 +45,7 @@ class GpuStream : public StreamInterface { void* platform_specific_stream() override { return gpu_stream_; } - // Explicitly initialize the CUDA resources associated with this stream, used - // by StreamExecutor::AllocateStream(). + // Explicitly initialize the CUDA resources associated with this stream. bool Init(); void SetPriority(StreamPriority priority) override { diff --git a/third_party/xla/xla/stream_executor/host/host_executor.cc b/third_party/xla/xla/stream_executor/host/host_executor.cc index 3d59b1c69ea587..8a9c812bb7ecdc 100644 --- a/third_party/xla/xla/stream_executor/host/host_executor.cc +++ b/third_party/xla/xla/stream_executor/host/host_executor.cc @@ -212,8 +212,6 @@ bool HostExecutor::HostCallback( return true; } -bool HostExecutor::AllocateStream(Stream* stream) { return true; } - void HostExecutor::DeallocateStream(Stream* stream) {} bool HostExecutor::CreateStreamDependency(Stream* dependent, Stream* other) { @@ -304,9 +302,7 @@ HostExecutor::CreateDeviceDescription(int device_ordinal) { absl::StatusOr> HostExecutor::CreateStream( std::optional> priority) { - auto stream = std::make_unique(this, std::make_unique()); - TF_RETURN_IF_ERROR(stream->Initialize(priority)); - return std::move(stream); + return std::make_unique(this, std::make_unique()); } } // namespace host diff --git a/third_party/xla/xla/stream_executor/host/host_executor.h b/third_party/xla/xla/stream_executor/host/host_executor.h index 40cb0ccea89330..c57a078e6f6b51 100644 --- a/third_party/xla/xla/stream_executor/host/host_executor.h +++ b/third_party/xla/xla/stream_executor/host/host_executor.h @@ -111,7 +111,6 @@ class HostExecutor : public StreamExecutor { absl::Status WaitForEvent(Stream* stream, Event* event) override; Event::Status PollForEventStatus(Event* event) override; - bool AllocateStream(Stream* stream) override; void DeallocateStream(Stream* stream) override; bool CreateStreamDependency(Stream* dependent, Stream* other) override; diff --git a/third_party/xla/xla/stream_executor/mock_stream_executor.h b/third_party/xla/xla/stream_executor/mock_stream_executor.h index 12a0c2794baf7b..89464874408753 100644 --- a/third_party/xla/xla/stream_executor/mock_stream_executor.h +++ b/third_party/xla/xla/stream_executor/mock_stream_executor.h @@ -142,7 +142,6 @@ class MockStreamExecutor : public StreamExecutorInterface { MOCK_METHOD(absl::Status, WaitForEventOnExternalStream, (std::intptr_t stream, Event* event), (override)); MOCK_METHOD(Event::Status, PollForEventStatus, (Event * event), (override)); - MOCK_METHOD(bool, AllocateStream, (Stream * stream), (override)); MOCK_METHOD(void, DeallocateStream, (Stream * stream), (override)); MOCK_METHOD(bool, CreateStreamDependency, (Stream * dependent, Stream* other), (override)); diff --git a/third_party/xla/xla/stream_executor/rocm/rocm_executor.cc b/third_party/xla/xla/stream_executor/rocm/rocm_executor.cc index 7f714d765354dc..226cf3afe231b7 100644 --- a/third_party/xla/xla/stream_executor/rocm/rocm_executor.cc +++ b/third_party/xla/xla/stream_executor/rocm/rocm_executor.cc @@ -698,13 +698,6 @@ Event::Status GpuExecutor::PollForEventStatus(Event* event) { return AsGpuEvent(event)->PollForStatus(); } -bool GpuExecutor::AllocateStream(Stream* stream) { - absl::MutexLock l(&alive_gpu_streams_mu_); - bool out = AsGpuStream(stream)->Init(); - alive_gpu_streams_[stream->platform_specific_handle().stream] = stream; - return out; -} - void GpuExecutor::DeallocateStream(Stream* stream) { { absl::MutexLock lock(&mu_); @@ -875,9 +868,16 @@ absl::StatusOr> GpuExecutor::CreateStream( gpu_stream->SetPriority(std::get(*priority)); } } - auto stream = std::make_unique(this, std::move(gpu_stream)); - TF_RETURN_IF_ERROR(stream->Initialize(priority)); - return std::move(stream); + absl::MutexLock l(&alive_gpu_streams_mu_); + bool init_worked = gpu_stream->Init(); + if (init_worked) { + auto platform_specific_stream = gpu_stream->platform_specific_stream(); + auto stream = std::make_unique(this, std::move(gpu_stream)); + alive_gpu_streams_[platform_specific_stream] = stream.get(); + return std::move(stream); + } else { + return absl::InvalidArgumentError("Failed to initialize GPU stream"); + } } absl::StatusOr> GpuExecutor::CreateKernel() { diff --git a/third_party/xla/xla/stream_executor/stream.cc b/third_party/xla/xla/stream_executor/stream.cc index 4fc078b0ae2c14..dbb0ca213cced9 100644 --- a/third_party/xla/xla/stream_executor/stream.cc +++ b/third_party/xla/xla/stream_executor/stream.cc @@ -47,17 +47,6 @@ Stream::Stream(StreamExecutor *parent, implementation_(std::move(implementation)), status_(absl::OkStatus()) {} -absl::Status Stream::Initialize( - std::optional> priority) { - absl::MutexLock lock(&mu_); - if (parent_->AllocateStream(this)) { - // Successful initialization! - return absl::OkStatus(); - } - - return absl::InternalError("failed to allocate stream during initialization"); -} - Stream::~Stream() { // Ensure the stream is completed. auto status = BlockHostUntilDone(); diff --git a/third_party/xla/xla/stream_executor/stream.h b/third_party/xla/xla/stream_executor/stream.h index cc0d5d94ae0524..f6bf0a49335d82 100644 --- a/third_party/xla/xla/stream_executor/stream.h +++ b/third_party/xla/xla/stream_executor/stream.h @@ -110,11 +110,6 @@ class Stream { // execution status. absl::Status RefreshStatus() TF_LOCKS_EXCLUDED(mu_); - // Initialize the stream. This must be performed before entraining any other - // operations. - absl::Status Initialize( - std::optional> priority = std::nullopt); - // Get or create a sub-stream from this stream. If there is any sub-stream in // the pool that can be reused then just return this sub-stream. Otherwise // create a new sub-stream. diff --git a/third_party/xla/xla/stream_executor/stream_executor_interface.h b/third_party/xla/xla/stream_executor/stream_executor_interface.h index 9097ea4f064c91..65eff81b0a4d25 100644 --- a/third_party/xla/xla/stream_executor/stream_executor_interface.h +++ b/third_party/xla/xla/stream_executor/stream_executor_interface.h @@ -282,10 +282,6 @@ class StreamExecutorInterface { // Requests the current status of the event from the underlying platform. virtual Event::Status PollForEventStatus(Event* event) = 0; - // Allocates stream resources on the underlying platform and initializes its - // internals. - virtual bool AllocateStream(Stream* stream) = 0; - // Deallocates stream resources on the underlying platform. virtual void DeallocateStream(Stream* stream) = 0; diff --git a/third_party/xla/xla/stream_executor/tpu/tpu_executor.cc b/third_party/xla/xla/stream_executor/tpu/tpu_executor.cc index c36d09f5c238f0..c59c0c45db1b82 100644 --- a/third_party/xla/xla/stream_executor/tpu/tpu_executor.cc +++ b/third_party/xla/xla/stream_executor/tpu/tpu_executor.cc @@ -81,11 +81,6 @@ tensorflow::tpu::TpuCoreLocationExternal TpuExecutor::GetCoreLocationExternal() ExecutorApiFn()->TpuExecutor_GetCoreLocationFn(executor_)); } -bool TpuExecutor::AllocateStream(Stream* stream) { - return ExecutorApiFn()->TpuExecutor_AllocateStreamFn( - executor_, get_stream(stream->implementation())); -} - void TpuExecutor::DeallocateStream(Stream* stream) { ExecutorApiFn()->TpuExecutor_DeallocateStreamFn( executor_, get_stream(stream->implementation())); @@ -142,7 +137,6 @@ absl::StatusOr> TpuExecutor::CreateStream( stream_map()[ptr.get()] = tpu_stream; tpu_platform().mutex().Unlock(); auto stream = std::make_unique(this, std::move(ptr)); - TF_RETURN_IF_ERROR(stream->Initialize(priority)); return std::move(stream); } diff --git a/third_party/xla/xla/stream_executor/tpu/tpu_executor.h b/third_party/xla/xla/stream_executor/tpu/tpu_executor.h index 2c38b2ed0b464c..74b17765ef5b92 100644 --- a/third_party/xla/xla/stream_executor/tpu/tpu_executor.h +++ b/third_party/xla/xla/stream_executor/tpu/tpu_executor.h @@ -70,8 +70,6 @@ class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface { absl::Status AllocateEvent(Event* event) override; - bool AllocateStream(Stream* stream) override; - absl::Status BlockHostUntilDone(Stream* stream) override; StatusOr> CreateDeviceDescription() diff --git a/third_party/xla/xla/stream_executor/tpu/tpu_executor_init_fns.inc b/third_party/xla/xla/stream_executor/tpu/tpu_executor_init_fns.inc index 3221fe01c0076d..4b5f8b64890efd 100644 --- a/third_party/xla/xla/stream_executor/tpu/tpu_executor_init_fns.inc +++ b/third_party/xla/xla/stream_executor/tpu/tpu_executor_init_fns.inc @@ -22,7 +22,6 @@ absl::Status SetExecutorStructFn( TFTPU_SET_FN(executor_fn, TpuExecutor_Deallocate); TFTPU_SET_FN(executor_fn, TpuExecutor_GetAllocatorStats); TFTPU_SET_FN(executor_fn, TpuExecutor_DeviceMemoryUsage); - TFTPU_SET_FN(executor_fn, TpuExecutor_AllocateStream); TFTPU_SET_FN(executor_fn, TpuExecutor_DeallocateStream); TFTPU_SET_FN(executor_fn, TpuExecutor_CreateStreamDependency); TFTPU_SET_FN(executor_fn, TpuExecutor_GetStatus); From 5699e32d0b2f7bd93e4a891eeee05c3f52315eae Mon Sep 17 00:00:00 2001 From: Kuy Mainwaring Date: Tue, 14 May 2024 15:19:41 -0700 Subject: [PATCH 140/478] [XLA:GPU] Clang-tidy fixes for gpu_norm_runner.h, in_place_dynamic_update_slice_mlir.h, tiling_util.h and indexing_analysis.h PiperOrigin-RevId: 633722707 --- third_party/xla/xla/service/gpu/fusions/BUILD | 2 ++ .../in_place_dynamic_update_slice_mlir.h | 3 +-- .../xla/xla/service/gpu/fusions/tiling_util.h | 7 +++++++ .../xla/xla/service/gpu/gpu_norm_runner.h | 2 +- .../xla/service/gpu/model/indexing_analysis.h | 18 +++++++++--------- 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/third_party/xla/xla/service/gpu/fusions/BUILD b/third_party/xla/xla/service/gpu/fusions/BUILD index 1b1d9c8a5ef0e5..17b48188bed3ea 100644 --- a/third_party/xla/xla/service/gpu/fusions/BUILD +++ b/third_party/xla/xla/service/gpu/fusions/BUILD @@ -585,8 +585,10 @@ cc_library( "//xla/service/llvm_ir:kernel_support_library", "//xla/service/llvm_ir:llvm_loop", "//xla/service/llvm_ir:llvm_util", + "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:span", "@llvm-project//llvm:Support", "@llvm-project//llvm:ir_headers", diff --git a/third_party/xla/xla/service/gpu/fusions/in_place_dynamic_update_slice_mlir.h b/third_party/xla/xla/service/gpu/fusions/in_place_dynamic_update_slice_mlir.h index 9d1514c131524d..efcfce8b2c6ede 100644 --- a/third_party/xla/xla/service/gpu/fusions/in_place_dynamic_update_slice_mlir.h +++ b/third_party/xla/xla/service/gpu/fusions/in_place_dynamic_update_slice_mlir.h @@ -21,7 +21,6 @@ limitations under the License. #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/Interfaces/DataLayoutInterfaces.h" // from @llvm-project #include "xla/hlo/ir/hlo_instruction.h" #include "xla/hlo/ir/hlo_instructions.h" #include "xla/service/gpu/fusions/mlir/computation_partitioner.h" @@ -62,7 +61,7 @@ class MlirInPlaceDynamicUpdateSliceFusion : public MlirFusionEmitterBase { std::optional ComputeThreadIdToInputIndexing( int64_t root_index, int64_t hero_operand_index, - mlir::MLIRContext* indexing_context) const override; + mlir::MLIRContext* mlir_context) const override; protected: absl::Status EmitEntryFunction( diff --git a/third_party/xla/xla/service/gpu/fusions/tiling_util.h b/third_party/xla/xla/service/gpu/fusions/tiling_util.h index f06ae8ccab4280..977dbdaf986f72 100644 --- a/third_party/xla/xla/service/gpu/fusions/tiling_util.h +++ b/third_party/xla/xla/service/gpu/fusions/tiling_util.h @@ -19,9 +19,16 @@ limitations under the License. #include #include +#include "absl/container/inlined_vector.h" #include "absl/log/check.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" #include "absl/types/span.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "xla/service/llvm_ir/ir_array.h" +#include "xla/shape.h" #include "xla/shape_util.h" #include "xla/util.h" diff --git a/third_party/xla/xla/service/gpu/gpu_norm_runner.h b/third_party/xla/xla/service/gpu/gpu_norm_runner.h index 8461671e86d037..7c9da554a16fa3 100644 --- a/third_party/xla/xla/service/gpu/gpu_norm_runner.h +++ b/third_party/xla/xla/service/gpu/gpu_norm_runner.h @@ -184,7 +184,7 @@ absl::Status RunGpuNorm(const GpuNormConfig& conv_config, const se::DeviceMemoryBase& y_or_dx_buffer, std::optional bias_buffer, std::optional dy_buffer, - std::optional exepctation_buffer, + std::optional expectation_buffer, std::optional norm_factor_buffer, std::optional dscale_buffer, std::optional dbias_buffer, diff --git a/third_party/xla/xla/service/gpu/model/indexing_analysis.h b/third_party/xla/xla/service/gpu/model/indexing_analysis.h index 5408d1e5dd9708..76fc8ab39e476c 100644 --- a/third_party/xla/xla/service/gpu/model/indexing_analysis.h +++ b/third_party/xla/xla/service/gpu/model/indexing_analysis.h @@ -104,7 +104,7 @@ HloInstructionIndexing ComputeInputToOutputIndexing(const HloInstruction* instr, // fusion does not make much sense, but they are created sometimes. IndexingMap ComputeEpilogueInputToOutputIndexing( HloInstructionAdaptor epilogue_parent, HloInstructionAdaptor epilogue_root, - mlir::MLIRContext* ctx); + mlir::MLIRContext* mlir_context); using GroupedByOpIndexingMap = absl::flat_hash_map; @@ -132,32 +132,32 @@ bool FuseProducerConsumerOutputToInputIndexing( // Equivalent to linearizing the input_shape index and then delinearizing it // to output_shape. IndexingMap GetBitcastMap(const Shape& input_shape, const Shape& output_shape, - mlir::MLIRContext* ctx); + mlir::MLIRContext* mlir_context); // Creates an indexing map from the physical layout of the tensor to its logical // layout. -IndexingMap GetIndexingMapFromPhysicalLayoutToLogical(const Shape& shape, - mlir::MLIRContext* ctx); +IndexingMap GetIndexingMapFromPhysicalLayoutToLogical( + const Shape& shape, mlir::MLIRContext* mlir_context); // Creates an indexing map from the logical layout of the tensor to its physical // layout. -IndexingMap GetIndexingMapFromLogicalToPhysicalLayout(const Shape& shape, - mlir::MLIRContext* ctx); +IndexingMap GetIndexingMapFromLogicalToPhysicalLayout( + const Shape& shape, mlir::MLIRContext* mlir_context); // Creates an indexing map from thread and block IDs to elements of the tiled // shape. Uses the same convention as KernelFusionInterface: dimensions 0 to 2 // are thread indices (currently only 0 is used), dimensions 3 to 5 are block // indices (currently only 3 is used). mlir::AffineMap GetBlockOffsetsForTiling(const Tiling& tiling, - mlir::MLIRContext* ctx); + mlir::MLIRContext* mlir_context); mlir::AffineMap GetThreadOffsetsForTiling(const Tiling& tiling, - mlir::MLIRContext* ctx); + mlir::MLIRContext* mlir_context); // Convenience functions for the two functions above // (`GetBlockOffsestsForTiling` + `GetThreadOffsetsForTiling`). Also sets up // the ranges of dimensions and symbols. IndexingMap GetIndexingMapForTiling(const Tiling& tiling, - mlir::MLIRContext* ctx); + mlir::MLIRContext* mlir_context); IndexingMap GetIndexingMapForTiling(mlir::AffineMap block_offsets, mlir::AffineMap thread_offsets, int64_t threads_per_block, From 8d5affaa0e8d971becc90b253ff5b4e6bf69908f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 15:24:32 -0700 Subject: [PATCH 141/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633723992 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 4ac4c1beb21194..cdc0191b8ad6c9 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/nodeserialize +go/debugproto op { name: "Abort" attr { From d17f7cea03029b5e8b7848ea1345edcff09d8f2b Mon Sep 17 00:00:00 2001 From: Kyle Lucke Date: Tue, 14 May 2024 15:31:17 -0700 Subject: [PATCH 142/478] Eliminate deprecated Create methods, and use the KernelFactory methods everywhere. PiperOrigin-RevId: 633726272 --- third_party/xla/xla/service/gpu/BUILD | 3 + .../xla/xla/service/gpu/buffer_comparator.cc | 10 +-- third_party/xla/xla/service/gpu/kernels/BUILD | 1 + .../xla/service/gpu/kernels/topk_kernel.cc | 8 +- .../xla/service/gpu/make_batch_pointers.cc | 3 +- .../xla/service/gpu/stream_executor_util.cc | 6 +- third_party/xla/xla/stream_executor/BUILD | 1 + third_party/xla/xla/stream_executor/gpu/BUILD | 3 + .../stream_executor/gpu/gpu_command_buffer.cc | 29 ++++--- .../gpu/gpu_timer_kernel_cuda.cu.cc | 7 +- .../gpu/redzone_allocator_kernel_cuda.cc | 7 +- .../xla/xla/stream_executor/host/BUILD | 1 + .../stream_executor/host/host_kernel_test.cc | 3 +- third_party/xla/xla/stream_executor/kernel.cc | 7 -- third_party/xla/xla/stream_executor/kernel.h | 78 ------------------- .../xla/xla/stream_executor/kernel_test.cc | 3 +- .../stream_executor/typed_kernel_factory.h | 6 +- 17 files changed, 59 insertions(+), 117 deletions(-) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 3d6c90b8119d96..c19905c49698c8 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -4367,6 +4367,7 @@ cc_library( "//xla/stream_executor", "//xla/stream_executor:kernel_factory", "//xla/stream_executor:launch_dim", + "//xla/stream_executor:typed_kernel_factory", "//xla/tsl/util:env_var", "//xla/tsl/util/proto:proto_utils", "@com_google_absl//absl/algorithm:container", @@ -4476,6 +4477,7 @@ cc_library( "//xla/service:hlo_module_config", "//xla/stream_executor", "//xla/stream_executor:device_memory_handle", + "//xla/stream_executor:typed_kernel_factory", "//xla/stream_executor/gpu:asm_compiler", "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:logging", @@ -5329,6 +5331,7 @@ cc_library( "//xla:util", "//xla/stream_executor", "//xla/stream_executor:device_memory", + "//xla/stream_executor:typed_kernel_factory", "//xla/stream_executor/gpu:gpu_stream_header", "@com_google_absl//absl/status", "@local_tsl//tsl/platform:errors", diff --git a/third_party/xla/xla/service/gpu/buffer_comparator.cc b/third_party/xla/xla/service/gpu/buffer_comparator.cc index ab9141b3e042f5..f4f5112add357b 100644 --- a/third_party/xla/xla/service/gpu/buffer_comparator.cc +++ b/third_party/xla/xla/service/gpu/buffer_comparator.cc @@ -31,6 +31,7 @@ limitations under the License. #include "xla/stream_executor/device_memory_handle.h" #include "xla/stream_executor/kernel.h" #include "xla/stream_executor/stream_executor.h" +#include "xla/stream_executor/typed_kernel_factory.h" #include "xla/util.h" #include "tsl/platform/errors.h" #include "tsl/platform/logging.h" @@ -75,11 +76,10 @@ static absl::StatusOr DeviceCompare(se::Stream* stream, TF_ASSIGN_OR_RETURN( ComparisonKernelT comparison_kernel, - (se::TypedKernel, se::DeviceMemory, - float, uint64_t, - se::DeviceMemory>::Create(executor, - kernel_name, - kernel_symbol))); + (se::TypedKernelFactory< + se::DeviceMemory, se::DeviceMemory, float, + uint64_t, se::DeviceMemory>::Create(executor, kernel_name, + kernel_symbol))); const se::DeviceDescription& gpu_device_info = executor->GetDeviceDescription(); diff --git a/third_party/xla/xla/service/gpu/kernels/BUILD b/third_party/xla/xla/service/gpu/kernels/BUILD index fc9c7ec54b2aff..a7add705572576 100644 --- a/third_party/xla/xla/service/gpu/kernels/BUILD +++ b/third_party/xla/xla/service/gpu/kernels/BUILD @@ -143,6 +143,7 @@ cc_library( "//xla:xla_data_proto_cc", "//xla/stream_executor", # build_cleaner: keep "//xla/stream_executor:platform", + "//xla/stream_executor:typed_kernel_factory", "//xla/stream_executor/gpu:gpu_stream_header", "//xla/stream_executor/gpu:gpu_types_header", "@com_google_absl//absl/numeric:bits", diff --git a/third_party/xla/xla/service/gpu/kernels/topk_kernel.cc b/third_party/xla/xla/service/gpu/kernels/topk_kernel.cc index 3ce37ef5978b13..1595d823b41fd8 100644 --- a/third_party/xla/xla/service/gpu/kernels/topk_kernel.cc +++ b/third_party/xla/xla/service/gpu/kernels/topk_kernel.cc @@ -32,6 +32,7 @@ limitations under the License. #include "xla/stream_executor/kernel.h" #include "xla/stream_executor/launch_dim.h" #include "xla/stream_executor/stream.h" +#include "xla/stream_executor/typed_kernel_factory.h" #include "xla/types.h" #include "xla/xla_data.pb.h" #include "tsl/platform/errors.h" @@ -84,9 +85,10 @@ absl::Status TypedTopK(se::Stream* stream, se::DeviceMemoryBase data, TF_ASSIGN_OR_RETURN(void* kernel_symbol, GetKernel(num_elements, k)); TF_ASSIGN_OR_RETURN( auto kernel, - (se::TypedKernel, size_t, se::DeviceMemory, - se::DeviceMemory, - size_t>::Create(executor, "topk", kernel_symbol))); + (se::TypedKernelFactory, size_t, se::DeviceMemory, + se::DeviceMemory, + size_t>::Create(executor, "topk", + kernel_symbol))); TF_RETURN_IF_ERROR(stream->ThenLaunch( se::ThreadDim(num_threads, 1, 1), se::BlockDim(batch_size, 1, 1), diff --git a/third_party/xla/xla/service/gpu/make_batch_pointers.cc b/third_party/xla/xla/service/gpu/make_batch_pointers.cc index e7788d652dcc40..f0de161a35e3ce 100644 --- a/third_party/xla/xla/service/gpu/make_batch_pointers.cc +++ b/third_party/xla/xla/service/gpu/make_batch_pointers.cc @@ -23,6 +23,7 @@ limitations under the License. #include "xla/stream_executor/kernel.h" #include "xla/stream_executor/launch_dim.h" #include "xla/stream_executor/stream.h" +#include "xla/stream_executor/typed_kernel_factory.h" #include "xla/util.h" #include "tsl/platform/errors.h" #include "tsl/platform/statusor.h" @@ -60,7 +61,7 @@ absl::Status MakeBatchPointers(se::Stream* stream, TF_ASSIGN_OR_RETURN( auto kernel, - (se::TypedKernel< + (se::TypedKernelFactory< se::DeviceMemoryBase, size_t, size_t, se::DeviceMemoryBase>::Create(executor, "make_batch_pointers", make_batch_pointers::kernel()))); diff --git a/third_party/xla/xla/service/gpu/stream_executor_util.cc b/third_party/xla/xla/service/gpu/stream_executor_util.cc index 8760faa725bcef..93c0a63bb14566 100644 --- a/third_party/xla/xla/service/gpu/stream_executor_util.cc +++ b/third_party/xla/xla/service/gpu/stream_executor_util.cc @@ -57,6 +57,7 @@ limitations under the License. #include "xla/stream_executor/launch_dim.h" #include "xla/stream_executor/platform.h" #include "xla/stream_executor/stream.h" +#include "xla/stream_executor/typed_kernel_factory.h" #include "xla/tsl/util/env_var.h" #include "xla/tsl/util/proto/proto_utils.h" #include "xla/util.h" @@ -489,8 +490,9 @@ static void InitializeTypedBuffer(se::Stream* stream, // Repeat the host_buffer_size elements at the start of `buf` to the end CHECK_EQ(elements_to_fill, buffer.size() / sizeof(T) - host_buffer_size); se::StreamExecutor* executor = stream->parent(); - auto kernel = se::TypedKernel::Create( - executor, "RepeatBufferKernel", repeat_buffer_kernel::kernel()); + auto kernel = + se::TypedKernelFactory::Create( + executor, "RepeatBufferKernel", repeat_buffer_kernel::kernel()); if (!kernel.ok()) { LOG(FATAL) << "Could not create RepeatBufferKernel: " << kernel.status(); } diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index 3899b903553264..d41de1e0ff7032 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -752,6 +752,7 @@ xla_cc_test( deps = [ ":device_memory", ":stream_executor", + ":typed_kernel_factory", "//xla/stream_executor/host:host_platform", "@local_tsl//tsl/platform:test", "@local_tsl//tsl/platform:test_benchmark", diff --git a/third_party/xla/xla/stream_executor/gpu/BUILD b/third_party/xla/xla/stream_executor/gpu/BUILD index 8e70a01a43a6ed..619a5bb034ab76 100644 --- a/third_party/xla/xla/stream_executor/gpu/BUILD +++ b/third_party/xla/xla/stream_executor/gpu/BUILD @@ -152,6 +152,7 @@ gpu_only_cc_library( ":gpu_types_header", "//xla/stream_executor", "//xla/stream_executor:stream_executor_interface", + "//xla/stream_executor:typed_kernel_factory", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/functional:any_invocable", @@ -343,6 +344,7 @@ gpu_kernel_library( ":gpu_semaphore", ":gpu_stream", "//xla/stream_executor", + "//xla/stream_executor:typed_kernel_factory", "@com_google_absl//absl/status:statusor", ], ) @@ -484,6 +486,7 @@ gpu_kernel_library( "//xla/stream_executor", "//xla/stream_executor:device_memory", "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor:typed_kernel_factory", "//xla/stream_executor/cuda:cuda_asm_compiler", "//xla/stream_executor/cuda:cuda_driver", "@com_google_absl//absl/base", diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc index c0788cf21a9e58..bcfc6a3790507f 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc +++ b/third_party/xla/xla/stream_executor/gpu/gpu_command_buffer.cc @@ -46,6 +46,7 @@ limitations under the License. #include "xla/stream_executor/kernel_spec.h" #include "xla/stream_executor/launch_dim.h" #include "xla/stream_executor/stream_executor.h" +#include "xla/stream_executor/typed_kernel_factory.h" #include "tsl/platform/env.h" #include "tsl/platform/errors.h" #include "tsl/platform/logging.h" @@ -217,8 +218,9 @@ GpuCommandBuffer::GetSetIfConditionKernel() { if (!set_if_condition_kernel_) { MultiKernelLoaderSpec spec(/*arity=*/2); spec.AddCudaPtxInMemory(gpu::GetSetIfConditionKernel(), "set_if_condition"); - TF_ASSIGN_OR_RETURN(set_if_condition_kernel_, - SetIfConditionKernel::Create(parent_, spec)); + TF_ASSIGN_OR_RETURN( + set_if_condition_kernel_, + SetIfConditionKernel::FactoryType::Create(parent_, spec)); } return &set_if_condition_kernel_; } @@ -229,8 +231,9 @@ GpuCommandBuffer::GetSetIfElseConditionKernel() { MultiKernelLoaderSpec spec(/*arity=*/3); spec.AddCudaPtxInMemory(gpu::GetSetIfElseConditionKernel(), "set_if_else_condition"); - TF_ASSIGN_OR_RETURN(set_if_else_condition_kernel_, - SetIfElseConditionKernel::Create(parent_, spec)); + TF_ASSIGN_OR_RETURN( + set_if_else_condition_kernel_, + SetIfElseConditionKernel::FactoryType::Create(parent_, spec)); } return &set_if_else_condition_kernel_; } @@ -241,8 +244,9 @@ GpuCommandBuffer::GetSetCaseConditionKernel() { MultiKernelLoaderSpec spec(/*arity=*/10); spec.AddCudaPtxInMemory(gpu::GetSetCaseConditionKernel(), "set_case_condition"); - TF_ASSIGN_OR_RETURN(set_case_condition_kernel_, - SetCaseConditionKernel::Create(parent_, spec)); + TF_ASSIGN_OR_RETURN( + set_case_condition_kernel_, + SetCaseConditionKernel::FactoryType::Create(parent_, spec)); } return &set_case_condition_kernel_; } @@ -253,8 +257,9 @@ GpuCommandBuffer::GetSetForConditionKernel() { MultiKernelLoaderSpec spec(/*arity=*/3); spec.AddCudaPtxInMemory(gpu::GetSetForConditionKernel(), "set_for_condition"); - TF_ASSIGN_OR_RETURN(set_for_condition_kernel_, - SetForConditionKernel::Create(parent_, spec)); + TF_ASSIGN_OR_RETURN( + set_for_condition_kernel_, + SetForConditionKernel::FactoryType::Create(parent_, spec)); } return &set_for_condition_kernel_; } @@ -265,8 +270,9 @@ GpuCommandBuffer::GetSetWhileConditionKernel() { MultiKernelLoaderSpec spec(/*arity=*/2); spec.AddCudaPtxInMemory(gpu::GetSetWhileConditionKernel(), "set_while_condition"); - TF_ASSIGN_OR_RETURN(set_while_condition_kernel_, - SetWhileConditionKernel::Create(parent_, spec)); + TF_ASSIGN_OR_RETURN( + set_while_condition_kernel_, + SetWhileConditionKernel::FactoryType::Create(parent_, spec)); } return &set_while_condition_kernel_; } @@ -277,7 +283,8 @@ GpuCommandBuffer::GetNoOpKernel() { if (!noop_kernel_) { MultiKernelLoaderSpec spec(/*arity=*/0); spec.AddCudaPtxInMemory(gpu::kNoOpKernel, "noop"); - TF_ASSIGN_OR_RETURN(noop_kernel_, NoOpKernel::Create(parent_, spec)); + TF_ASSIGN_OR_RETURN(noop_kernel_, + NoOpKernel::FactoryType::Create(parent_, spec)); } return &noop_kernel_; #else diff --git a/third_party/xla/xla/stream_executor/gpu/gpu_timer_kernel_cuda.cu.cc b/third_party/xla/xla/stream_executor/gpu/gpu_timer_kernel_cuda.cu.cc index b4af6019234d04..8a320c583547c6 100644 --- a/third_party/xla/xla/stream_executor/gpu/gpu_timer_kernel_cuda.cu.cc +++ b/third_party/xla/xla/stream_executor/gpu/gpu_timer_kernel_cuda.cu.cc @@ -19,6 +19,7 @@ limitations under the License. #include "xla/stream_executor/gpu/gpu_executor.h" #include "xla/stream_executor/gpu/gpu_semaphore.h" #include "xla/stream_executor/gpu/gpu_timer_kernel.h" +#include "xla/stream_executor/typed_kernel_factory.h" namespace stream_executor::gpu { namespace { @@ -60,8 +61,10 @@ absl::StatusOr LaunchDelayKernel(Stream* stream) { // multiple GpuTimer objects. TF_ASSIGN_OR_RETURN( auto kernel, - (TypedKernel, GpuSemaphoreState>::Create( - executor, "DelayKernel", reinterpret_cast(DelayKernel)))); + (TypedKernelFactory, + GpuSemaphoreState>::Create(executor, "DelayKernel", + reinterpret_cast( + DelayKernel)))); // Launch a delay kernel into this stream, which will spin until // GetElapsedDuration() is called, the timer is destroyed, or the timeout // in the kernel is reached. diff --git a/third_party/xla/xla/stream_executor/gpu/redzone_allocator_kernel_cuda.cc b/third_party/xla/xla/stream_executor/gpu/redzone_allocator_kernel_cuda.cc index 2432c1e5da79d2..6e6e5ad1077590 100644 --- a/third_party/xla/xla/stream_executor/gpu/redzone_allocator_kernel_cuda.cc +++ b/third_party/xla/xla/stream_executor/gpu/redzone_allocator_kernel_cuda.cc @@ -34,6 +34,7 @@ limitations under the License. #include "xla/stream_executor/gpu/redzone_allocator_kernel.h" #include "xla/stream_executor/kernel.h" #include "xla/stream_executor/stream_executor_pimpl.h" +#include "xla/stream_executor/typed_kernel_factory.h" #include "tsl/platform/statusor.h" namespace stream_executor { @@ -54,9 +55,9 @@ static absl::StatusOr*> LoadKernelOrGetPtr( auto it = kernel_ptr_cache.find(kernel_ptr_cache_key); if (it == kernel_ptr_cache.end()) { - TF_ASSIGN_OR_RETURN( - TypedKernel loaded, - (TypedKernel::Create(executor, kernel_name, ptx, cubin_data))); + TF_ASSIGN_OR_RETURN(TypedKernel loaded, + (TypedKernelFactory::Create( + executor, kernel_name, ptx, cubin_data))); it = kernel_ptr_cache.emplace(kernel_ptr_cache_key, std::move(loaded)).first; } diff --git a/third_party/xla/xla/stream_executor/host/BUILD b/third_party/xla/xla/stream_executor/host/BUILD index da6b0d35ba7d5b..6ad6b619df78c7 100644 --- a/third_party/xla/xla/stream_executor/host/BUILD +++ b/third_party/xla/xla/stream_executor/host/BUILD @@ -158,6 +158,7 @@ xla_cc_test( ":host_platform", "//xla/stream_executor", "//xla/stream_executor:device_memory", + "//xla/stream_executor:kernel_factory", "@com_google_absl//absl/types:span", "@local_tsl//tsl/lib/core:status_test_util", "@local_tsl//tsl/platform:statusor", diff --git a/third_party/xla/xla/stream_executor/host/host_kernel_test.cc b/third_party/xla/xla/stream_executor/host/host_kernel_test.cc index 2dcb432cf81bf4..cebe2b3220c7fd 100644 --- a/third_party/xla/xla/stream_executor/host/host_kernel_test.cc +++ b/third_party/xla/xla/stream_executor/host/host_kernel_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "absl/types/span.h" #include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/host/host_kernel_c_api.h" +#include "xla/stream_executor/kernel_factory.h" #include "xla/stream_executor/kernel_spec.h" #include "xla/stream_executor/launch_dim.h" #include "xla/stream_executor/platform.h" @@ -116,7 +117,7 @@ TEST(HostKernelTest, LlvmAddition) { auto executor = NewStreamExecutor(); auto eg = executor.get(); EXPECT_NE(eg, nullptr); - TF_ASSERT_OK_AND_ASSIGN(auto add, HostKernel::Create(eg, spec)); + TF_ASSERT_OK_AND_ASSIGN(auto add, KernelFactory::Create(eg, spec)); // TODO(tsilytskyi): implement Launch part // TF_ASSERT_OK(executor->Launch(ThreadDim(4), args)); diff --git a/third_party/xla/xla/stream_executor/kernel.cc b/third_party/xla/xla/stream_executor/kernel.cc index ba8f0bbda43b22..81767a7fe5025b 100644 --- a/third_party/xla/xla/stream_executor/kernel.cc +++ b/third_party/xla/xla/stream_executor/kernel.cc @@ -52,13 +52,6 @@ void KernelMetadata::set_shared_memory_bytes(int shared_memory_bytes) { // Kernel //===----------------------------------------------------------------------===// -absl::StatusOr> Kernel::Create( - StreamExecutorInterface *executor, const MultiKernelLoaderSpec &spec) { - TF_ASSIGN_OR_RETURN(auto kernel, executor->CreateKernel()); - TF_RETURN_IF_ERROR(executor->GetKernel(spec, kernel.get())); - return kernel; -} - void Kernel::set_name(absl::string_view name) { name_ = std::string(name); diff --git a/third_party/xla/xla/stream_executor/kernel.h b/third_party/xla/xla/stream_executor/kernel.h index 677e8db1a37b35..98de009e1b8a16 100644 --- a/third_party/xla/xla/stream_executor/kernel.h +++ b/third_party/xla/xla/stream_executor/kernel.h @@ -229,14 +229,6 @@ class Kernel { std::function>( const Kernel &kernel, const KernelArgs &args)>; - // TODO(b/323534971): Kernel constructor should be moved to StreamExecutor or - // a dedicated KernelFactory accessible via StreamExecutor. - - // Creates kernel on a given executor from a given kernel specification. - ABSL_DEPRECATED("Use KernelFactory::Create instead.") - static absl::StatusOr> Create( - StreamExecutorInterface *executor, const MultiKernelLoaderSpec &spec); - Kernel() = default; virtual ~Kernel() = default; @@ -293,41 +285,6 @@ class TypedKernel { public: static constexpr size_t kNumberOfParameters = sizeof...(Params); - // Creates a typed kernel on a given executor from a kernel specification. - ABSL_DEPRECATED("Use TypedKernelFactory::Create instead.") - static absl::StatusOr Create(StreamExecutorInterface *executor, - const MultiKernelLoaderSpec &spec) { - TF_ASSIGN_OR_RETURN(std::unique_ptr kernel, - Kernel::Create(executor, spec)); - return TypedKernel(std::move(kernel)); - } - - // Creates a kernel which can be launched with `stream.ThenLaunch(...)` from a - // PTX (and optional CUBIN), such that the types of the arguments provided for - // launch would have to match types of the arguments provided at creation - // time. The canonical storage for both ptx and cubin_data should outlive the - // lifetime of the kernel. - ABSL_DEPRECATED("Use TypedKernelFactory::Create instead.") - static absl::StatusOr Create( - StreamExecutorInterface *executor, absl::string_view kernel_name, - absl::string_view ptx, absl::Span cubin_data); - - // Creates a kernel which can be launched with `stream.ThenLaunch(...)` from - // an in-process symbol pointer. - ABSL_DEPRECATED("Use TypedKernelFactory::Create instead.") - static absl::StatusOr Create(StreamExecutorInterface *executor, - absl::string_view kernel_name, - void *symbol); - - // Creates a kernel which can be launched with `stream.ThenLaunch(...)` from - // an LLVM IR. - ABSL_DEPRECATED("Use TypedKernelFactory::Create instead.") - static absl::StatusOr Create(StreamExecutorInterface *executor, - absl::string_view ir, - absl::string_view entrypoint, - absl::string_view kernel_name, - absl::Span options); - TypedKernel() = default; Kernel &operator*() { return *kernel_; } @@ -754,41 +711,6 @@ std::unique_ptr PackKernelArgs( return std::make_unique(std::forward(args)..., shmem_bytes); } -template -inline absl::StatusOr> TypedKernel::Create( - StreamExecutorInterface *executor, absl::string_view kernel_name, - absl::string_view ptx, absl::Span cubin_data) { - MultiKernelLoaderSpec loader_spec(TypedKernel::kNumberOfParameters); - loader_spec.AddCudaPtxInMemory(ptx, kernel_name); - - if (!cubin_data.empty()) { - loader_spec.AddCudaCubinInMemory(cubin_data, kernel_name); - } - - return TypedKernel::Create(executor, loader_spec); -} - -template -inline absl::StatusOr> TypedKernel::Create( - StreamExecutorInterface *executor, absl::string_view kernel_name, - void *symbol) { - MultiKernelLoaderSpec loader_spec(TypedKernel::kNumberOfParameters); - loader_spec.AddInProcessSymbol(symbol, kernel_name); - - return TypedKernel::Create(executor, loader_spec); -} - -template -inline absl::StatusOr> TypedKernel::Create( - StreamExecutorInterface *executor, absl::string_view ir, - absl::string_view entrypoint, absl::string_view kernel_name, - absl::Span options) { - MultiKernelLoaderSpec loader_spec(TypedKernel::kNumberOfParameters); - loader_spec.AddLlvmHostKernel(ir, entrypoint, kernel_name, options); - - return TypedKernel::Create(executor, loader_spec); -} - } // namespace stream_executor #endif // XLA_STREAM_EXECUTOR_KERNEL_H_ diff --git a/third_party/xla/xla/stream_executor/kernel_test.cc b/third_party/xla/xla/stream_executor/kernel_test.cc index 205c559170e729..cf63e5b0a55281 100644 --- a/third_party/xla/xla/stream_executor/kernel_test.cc +++ b/third_party/xla/xla/stream_executor/kernel_test.cc @@ -26,6 +26,7 @@ limitations under the License. #include "xla/stream_executor/platform.h" #include "xla/stream_executor/platform_manager.h" #include "xla/stream_executor/stream_executor.h" +#include "xla/stream_executor/typed_kernel_factory.h" #include "tsl/platform/test.h" #include "tsl/platform/test_benchmark.h" @@ -124,7 +125,7 @@ TEST(KernelTest, FailToCreateTypedKernelFromEmptySpec) { MultiKernelLoaderSpec empty_spec(/*arity=*/0); auto executor = NewStreamExecutor(); - auto kernel = TypedKernel<>::Create(executor.get(), empty_spec); + auto kernel = TypedKernelFactory<>::Create(executor.get(), empty_spec); EXPECT_FALSE(kernel.ok()); } diff --git a/third_party/xla/xla/stream_executor/typed_kernel_factory.h b/third_party/xla/xla/stream_executor/typed_kernel_factory.h index ac908b13b08e0c..95b9eb6c505ca5 100644 --- a/third_party/xla/xla/stream_executor/typed_kernel_factory.h +++ b/third_party/xla/xla/stream_executor/typed_kernel_factory.h @@ -61,7 +61,7 @@ class TypedKernelFactory { loader_spec.AddCudaCubinInMemory(cubin_data, kernel_name); } - return TypedKernel::Create(executor, loader_spec); + return Create(executor, loader_spec); } // Creates a kernel which can be launched with `stream.ThenLaunch(...)` from @@ -73,7 +73,7 @@ class TypedKernelFactory { TypedKernel::kNumberOfParameters); loader_spec.AddInProcessSymbol(symbol, kernel_name); - return TypedKernel::Create(executor, loader_spec); + return Create(executor, loader_spec); } // Creates a kernel which can be launched with `stream.ThenLaunch(...)` from @@ -86,7 +86,7 @@ class TypedKernelFactory { TypedKernel::kNumberOfParameters); loader_spec.AddLlvmHostKernel(ir, entrypoint, kernel_name, options); - return TypedKernel::Create(executor, loader_spec); + return Create(executor, loader_spec); } }; From 2fca8835decdae0e261fbc98c2600ceed36a4d13 Mon Sep 17 00:00:00 2001 From: Deqiang Chen Date: Tue, 14 May 2024 16:03:02 -0700 Subject: [PATCH 143/478] tf_host_callback in tfrt/ifrt use DeviceMgr instead of StaticDeviceMgr for better generality and not owning the DeviceMgr since that can be owned/created in fallback_request PiperOrigin-RevId: 633734973 --- tensorflow/core/tfrt/fallback/fallback_state.h | 1 + tensorflow/core/tfrt/ifrt/ifrt_model_context.h | 10 ++++------ tensorflow/core/tfrt/ifrt/ifrt_serving_executable.cc | 6 +++--- tensorflow/core/tfrt/ifrt/ifrt_serving_executable.h | 6 +++--- tensorflow/core/tfrt/ifrt/tf_host_callback.cc | 2 +- tensorflow/core/tfrt/ifrt/tf_host_callback.h | 2 +- tensorflow/core/tfrt/runtime/runtime.h | 8 ++++++++ tensorflow/core/tfrt/saved_model/BUILD | 1 + tensorflow/core/tfrt/saved_model/saved_model.cc | 2 ++ tensorflow/core/tfrt/tfrt_session/tfrt_session.cc | 2 ++ 10 files changed, 26 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/tfrt/fallback/fallback_state.h b/tensorflow/core/tfrt/fallback/fallback_state.h index ae5158018b9b67..cf293b1b406a28 100644 --- a/tensorflow/core/tfrt/fallback/fallback_state.h +++ b/tensorflow/core/tfrt/fallback/fallback_state.h @@ -62,6 +62,7 @@ class FallbackState { const SessionOptions &session_options() const { return session_options_; } const DeviceMgr &device_manager() const { return device_manager_; } + DeviceMgr &device_manager() { return device_manager_; } const DeviceSet &device_set() const { return device_set_; } diff --git a/tensorflow/core/tfrt/ifrt/ifrt_model_context.h b/tensorflow/core/tfrt/ifrt/ifrt_model_context.h index 9bf38b8c92cb74..20af882d634a7a 100644 --- a/tensorflow/core/tfrt/ifrt/ifrt_model_context.h +++ b/tensorflow/core/tfrt/ifrt/ifrt_model_context.h @@ -65,12 +65,12 @@ class IfrtModelContext { std::shared_ptr client, IfrtServingCoreSelector* ifrt_serving_core_selector, const tsl::thread::ThreadPool* thread_pool, - std::unique_ptr device_mgr, + tensorflow::DeviceMgr* device_mgr, tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn) : client_(std::move(client)), ifrt_serving_core_selector_(ifrt_serving_core_selector), thread_pool_(*thread_pool), - device_mgr_(std::move(device_mgr)), + device_mgr_(device_mgr), shape_representation_fn_(shape_representation_fn) {} void RegisterHandle(ServingExecutableRegistry::Handle handle) { @@ -100,9 +100,7 @@ class IfrtModelContext { return restore_tensor_registry_; } - tensorflow::StaticDeviceMgr* GetDeviceMgr() const { - return device_mgr_.get(); - } + tensorflow::DeviceMgr* GetDeviceMgr() const { return device_mgr_; } IfrtServingCoreSelector* GetIfrtServingCoreSelector() const { return ifrt_serving_core_selector_; } @@ -127,7 +125,7 @@ class IfrtModelContext { IfrtServingCoreSelector* ifrt_serving_core_selector_; // May be nullptr const tsl::thread::ThreadPool& thread_pool_; - std::unique_ptr device_mgr_; + tensorflow::DeviceMgr* device_mgr_; // Not owned. tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn_ = tensorflow::IdentityShapeRepresentationFn(); diff --git a/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.cc b/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.cc index b65b9c68eb1f78..822df677a97240 100644 --- a/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.cc +++ b/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.cc @@ -165,7 +165,7 @@ IfrtServingExecutable::Create( IfrtLoadedVariableRegistry* ifrt_loaded_variable_registry, const IfrtRestoreTensorRegistry* ifrt_restore, tfrt::ConcurrentWorkQueue* checkpoint_loader_queue, - tensorflow::StaticDeviceMgr* device_mgr, + tensorflow::DeviceMgr* device_mgr, tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn, IfrtServingCoreSelector* ifrt_serving_core_selector) { TF_ASSIGN_OR_RETURN( @@ -237,7 +237,7 @@ GroupHostCallbackByKey(const Tf2HloResult& tf2hlo_result) { // TODO: shape propagation in module absl::StatusOr BuildHostCallback( absl::string_view key, const HostCallbackBuilderInfo& builder_info, - mlir::ModuleOp module, tensorflow::StaticDeviceMgr* device_mgr, + mlir::ModuleOp module, tensorflow::DeviceMgr* device_mgr, std::vector>& tf_host_callbacks) { VLOG(2) << "BuildHostCallback for key: " << key; @@ -310,7 +310,7 @@ absl::StatusOr BuildHostCallback( absl::StatusOr> BuildHostCallbacks( const Tf2HloResult& tf2hlo_result, mlir::ModuleOp module, - tensorflow::StaticDeviceMgr* device_mgr, + tensorflow::DeviceMgr* device_mgr, std::vector>& tf_host_callbacks) { TF_ASSIGN_OR_RETURN(auto host_callback_maps, GroupHostCallbackByKey(tf2hlo_result)); diff --git a/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.h b/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.h index 5a336689073cd6..1d224d9345c9dd 100644 --- a/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.h +++ b/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.h @@ -70,7 +70,7 @@ class IfrtServingExecutable { IfrtLoadedVariableRegistry* ifrt_loaded_variable_registry, const IfrtRestoreTensorRegistry* ifrt_restore, tfrt::ConcurrentWorkQueue* checkpoint_loader_queue, - tensorflow::StaticDeviceMgr* device_mgr, + tensorflow::DeviceMgr* device_mgr, tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn, IfrtServingCoreSelector* ifrt_serving_core_selector); @@ -140,7 +140,7 @@ class IfrtServingExecutable { IfrtLoadedVariableRegistry* ifrt_loaded_variable_registry, const IfrtRestoreTensorRegistry* ifrt_restore_tensor_registry, tfrt::ConcurrentWorkQueue* checkpoint_loader_queue, - tensorflow::StaticDeviceMgr* device_mgr, + tensorflow::DeviceMgr* device_mgr, tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn, IfrtServingCoreSelector* ifrt_serving_core_selector, tensorflow::tpu::TPUCompileMetadataProto original_compile_metadata) @@ -176,7 +176,7 @@ class IfrtServingExecutable { IfrtLoadedVariableRegistry& ifrt_loaded_variable_registry_; const IfrtRestoreTensorRegistry& ifrt_restore_tensor_registry_; tfrt::ConcurrentWorkQueue* checkpoint_loader_queue_; - tensorflow::StaticDeviceMgr* device_mgr_; // Not owned. For host callback. + tensorflow::DeviceMgr* device_mgr_; // Not owned. For host callback. tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn_; IfrtServingCoreSelector* ifrt_serving_core_selector_; diff --git a/tensorflow/core/tfrt/ifrt/tf_host_callback.cc b/tensorflow/core/tfrt/ifrt/tf_host_callback.cc index 084d1ea1a3ec7a..5c5a48f4fc52b4 100644 --- a/tensorflow/core/tfrt/ifrt/tf_host_callback.cc +++ b/tensorflow/core/tfrt/ifrt/tf_host_callback.cc @@ -127,7 +127,7 @@ absl::StatusOr> TfHostCallback::Create( absl::string_view entry_function_name, absl::Span operand_type_and_shapes, absl::Span result_type_and_shapes, - tensorflow::StaticDeviceMgr* device_mgr) { + tensorflow::DeviceMgr* device_mgr) { tensorflow::SessionOptions options; // Explicitly disable non-CPU devices to avoid triggering TPU device // initialization inside TF. diff --git a/tensorflow/core/tfrt/ifrt/tf_host_callback.h b/tensorflow/core/tfrt/ifrt/tf_host_callback.h index ddb00050e8df58..a78b0e5d0aecea 100644 --- a/tensorflow/core/tfrt/ifrt/tf_host_callback.h +++ b/tensorflow/core/tfrt/ifrt/tf_host_callback.h @@ -45,7 +45,7 @@ class TfHostCallback { absl::string_view entry_function_name, absl::Span operand_type_and_shapes, absl::Span result_type_and_shapes, - tensorflow::StaticDeviceMgr* device_mgr); + tensorflow::DeviceMgr* device_mgr); // The host callback function takes two pointer arrays, each element of which // points to allocated host buffer in host layout according to corresponding diff --git a/tensorflow/core/tfrt/runtime/runtime.h b/tensorflow/core/tfrt/runtime/runtime.h index 830210d1426dab..1a6925c1535cc9 100644 --- a/tensorflow/core/tfrt/runtime/runtime.h +++ b/tensorflow/core/tfrt/runtime/runtime.h @@ -27,6 +27,8 @@ limitations under the License. #include "absl/log/log.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/framework/device.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/platform/statusor.h" @@ -79,6 +81,11 @@ class ModelRuntimeContext { flib_def_ = flib_def; } + tensorflow::DeviceMgr* device_mgr() const { return device_mgr_; } + void set_device_mgr(tensorflow::DeviceMgr* device_mgr) { + device_mgr_ = device_mgr; + } + bool is_local_session() const { return is_local_session_; } void set_is_local_session(bool is_local_session) { @@ -104,6 +111,7 @@ class ModelRuntimeContext { const GraphDef* graph_def_ = nullptr; const CallableOptions* callable_options_ = nullptr; tfrt::ResourceContext* resource_context_ = nullptr; + tensorflow::DeviceMgr* device_mgr_ = nullptr; FunctionLibraryDefinition* flib_def_ = nullptr; diff --git a/tensorflow/core/tfrt/saved_model/BUILD b/tensorflow/core/tfrt/saved_model/BUILD index 3e49778bed8416..9e85c14baef362 100644 --- a/tensorflow/core/tfrt/saved_model/BUILD +++ b/tensorflow/core/tfrt/saved_model/BUILD @@ -118,6 +118,7 @@ cc_library( "//tensorflow/core:core_cpu_base", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/common_runtime:device_mgr", "//tensorflow/core/framework:function_proto_cc", "//tensorflow/core/framework:graph_proto_cc", "//tensorflow/core/framework:tensor_proto_cc", diff --git a/tensorflow/core/tfrt/saved_model/saved_model.cc b/tensorflow/core/tfrt/saved_model/saved_model.cc index 85bc3fabc94fad..13ff33bb9e086c 100644 --- a/tensorflow/core/tfrt/saved_model/saved_model.cc +++ b/tensorflow/core/tfrt/saved_model/saved_model.cc @@ -577,6 +577,8 @@ absl::StatusOr> SavedModelImpl::LoadSavedModel( CombineSignatureDefs(meta_graph_def.signature_def()); model_context.set_graph_def(&meta_graph_def.graph_def()); model_context.set_callable_options(&callable_options); + model_context.set_device_mgr(&fallback_state->device_manager()); + TF_RETURN_IF_ERROR( options.graph_execution_options.runtime->CreateRuntimeResources( model_context)); diff --git a/tensorflow/core/tfrt/tfrt_session/tfrt_session.cc b/tensorflow/core/tfrt/tfrt_session/tfrt_session.cc index 8116c4aab1fce3..67475b0b23ed3e 100644 --- a/tensorflow/core/tfrt/tfrt_session/tfrt_session.cc +++ b/tensorflow/core/tfrt/tfrt_session/tfrt_session.cc @@ -35,6 +35,7 @@ limitations under the License. #include "Eigen/ThreadPool" // from @eigen_archive #include "llvm/ADT/STLExtras.h" #include "tensorflow/compiler/mlir/tfrt/translate/tfrt_compile_options.h" +#include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/local_session_selection.h" #include "tensorflow/core/common_runtime/process_util.h" #include "tensorflow/core/common_runtime/session_factory.h" @@ -218,6 +219,7 @@ class TfrtSession : public tensorflow::Session { &options, /*export_dir=*/"unknown_export_dir", resource_context.get()); // TODO(b/334641254): Offer a Session option that prunes the graph_def. model_context.set_graph_def(&graph); + model_context.set_device_mgr(&fallback_state->device_manager()); // In the multi-host case, this prevents local Sessions from running // global resource creation functions. model_context.set_is_local_session( From bde6f752c22ac8463971fa9b3292f4c95f26e5bb Mon Sep 17 00:00:00 2001 From: David Dunleavy Date: Tue, 14 May 2024 16:15:59 -0700 Subject: [PATCH 144/478] Remove references to unused tags in XLA build script PiperOrigin-RevId: 633738393 --- third_party/xla/.kokoro/linux/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/xla/.kokoro/linux/build.sh b/third_party/xla/.kokoro/linux/build.sh index f9610594e5646d..f7e572c5046281 100644 --- a/third_party/xla/.kokoro/linux/build.sh +++ b/third_party/xla/.kokoro/linux/build.sh @@ -48,13 +48,13 @@ docker run --name xla -w /tf/xla -itd --rm \ "$DOCKER_IMAGE" \ bash -TAGS_FILTER="-no_oss,-oss_excluded,-oss_serial" +TAGS_FILTER="-no_oss" ADDITIONAL_FLAGS="" RBE_FLAGS="" TARGET_FILTERS="-@local_tsl//tsl/platform:subprocess_test -@local_tsl//tsl/platform/cloud:google_auth_provider_test -@local_tsl//tsl/platform/cloud:oauth_client_test" if is_linux_gpu_job ; then - TAGS_FILTER="$TAGS_FILTER,requires-gpu-nvidia,-no_gpu" + TAGS_FILTER="$TAGS_FILTER,requires-gpu-nvidia" # We are currently running XLA presubmits on machines with NVIDIA T4 GPUs, # which have a compute compatibility of 7.5. Se we filter out all the tests From c7bb3fbf20089e8ee17405650cca4c022d491717 Mon Sep 17 00:00:00 2001 From: Kyle Lucke Date: Tue, 14 May 2024 16:21:09 -0700 Subject: [PATCH 145/478] Clean unnecessary include and definition from kernel.h. PiperOrigin-RevId: 633739642 --- third_party/xla/xla/stream_executor/BUILD | 2 +- third_party/xla/xla/stream_executor/gpu/BUILD | 1 + .../xla/xla/stream_executor/gpu/stream_search_test.cc | 1 + third_party/xla/xla/stream_executor/kernel.cc | 7 ------- third_party/xla/xla/stream_executor/kernel.h | 2 -- third_party/xla/xla/stream_executor/stream.cc | 1 + 6 files changed, 4 insertions(+), 10 deletions(-) diff --git a/third_party/xla/xla/stream_executor/BUILD b/third_party/xla/xla/stream_executor/BUILD index d41de1e0ff7032..7137b54991ad52 100644 --- a/third_party/xla/xla/stream_executor/BUILD +++ b/third_party/xla/xla/stream_executor/BUILD @@ -605,8 +605,8 @@ cc_library( deps = [ ":device_memory", ":kernel_spec", + ":launch_dim", ":platform", - ":stream_executor_headers", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/meta:type_traits", diff --git a/third_party/xla/xla/stream_executor/gpu/BUILD b/third_party/xla/xla/stream_executor/gpu/BUILD index 619a5bb034ab76..b2bd00c972d26b 100644 --- a/third_party/xla/xla/stream_executor/gpu/BUILD +++ b/third_party/xla/xla/stream_executor/gpu/BUILD @@ -753,6 +753,7 @@ xla_cc_test( "//xla/stream_executor", "//xla/stream_executor/host:host_platform", "@com_google_absl//absl/status:statusor", + "@local_tsl//tsl/platform:statusor", "@local_tsl//tsl/platform:test", "@local_tsl//tsl/platform:test_main", ] + if_cuda([ diff --git a/third_party/xla/xla/stream_executor/gpu/stream_search_test.cc b/third_party/xla/xla/stream_executor/gpu/stream_search_test.cc index 9f91c63ae0d972..c0f66159400039 100644 --- a/third_party/xla/xla/stream_executor/gpu/stream_search_test.cc +++ b/third_party/xla/xla/stream_executor/gpu/stream_search_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include "xla/stream_executor/platform.h" #include "xla/stream_executor/platform_manager.h" #include "xla/stream_executor/stream_executor.h" +#include "tsl/platform/statusor.h" #include "tsl/platform/test.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/kernel.cc b/third_party/xla/xla/stream_executor/kernel.cc index 81767a7fe5025b..8f40daee24f284 100644 --- a/third_party/xla/xla/stream_executor/kernel.cc +++ b/third_party/xla/xla/stream_executor/kernel.cc @@ -16,19 +16,12 @@ limitations under the License. #include "xla/stream_executor/kernel.h" #include -#include #include #include -#include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "absl/strings/strip.h" -#include "xla/stream_executor/kernel_spec.h" -#include "xla/stream_executor/platform.h" -#include "xla/stream_executor/stream_executor.h" #include "tsl/platform/demangle.h" -#include "tsl/platform/errors.h" -#include "tsl/platform/statusor.h" namespace stream_executor { diff --git a/third_party/xla/xla/stream_executor/kernel.h b/third_party/xla/xla/stream_executor/kernel.h index 98de009e1b8a16..0f51a6ff447eb2 100644 --- a/third_party/xla/xla/stream_executor/kernel.h +++ b/third_party/xla/xla/stream_executor/kernel.h @@ -92,12 +92,10 @@ limitations under the License. #include "xla/stream_executor/kernel_spec.h" #include "xla/stream_executor/launch_dim.h" #include "tsl/platform/logging.h" -#include "tsl/platform/statusor.h" namespace stream_executor { class Kernel; -class StreamExecutorInterface; //===----------------------------------------------------------------------===// // Kernel cache config diff --git a/third_party/xla/xla/stream_executor/stream.cc b/third_party/xla/xla/stream_executor/stream.cc index dbb0ca213cced9..0c611032332b06 100644 --- a/third_party/xla/xla/stream_executor/stream.cc +++ b/third_party/xla/xla/stream_executor/stream.cc @@ -38,6 +38,7 @@ limitations under the License. #include "tsl/platform/errors.h" #include "tsl/platform/logging.h" #include "tsl/platform/stacktrace.h" +#include "tsl/platform/statusor.h" namespace stream_executor { From 244464154460be6e75e53ca8f69c965d256beb73 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 16:46:46 -0700 Subject: [PATCH 146/478] Expands the test coverage of GetReducedIntervals(), since this is now the preferred way of interacting with the Memory Term Reducer. PiperOrigin-RevId: 633746142 --- .../auto_sharding_memory_test.cc | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_memory_test.cc b/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_memory_test.cc index 4128d683c1dc7d..cfa7b4d2af23ee 100644 --- a/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_memory_test.cc +++ b/third_party/xla/xla/hlo/experimental/auto_sharding/auto_sharding_memory_test.cc @@ -68,10 +68,13 @@ TEST(AutoShardingMemoryTest, WithoutOverlap) { { 1}, { 1}, { 1}}; + const std::vector> expected_reduced_intervals = + {{0, 2}, {3, 5}}; const std::vector> expected_reduced_groups = {}; const std::pair expected_num_terms = {6, 6}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } @@ -100,11 +103,14 @@ TEST(AutoShardingMemoryTest, PartialOverlap) { { 2}, { 2}, { 1 }}; + const std::vector> expected_reduced_intervals = + {{0, 0}, {5, 5}, {1, 4}}; const std::vector> expected_reduced_groups = {{0, 1}}; const std::pair expected_num_terms = {10, 8}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } @@ -133,11 +139,14 @@ TEST(AutoShardingMemoryTest, PartialOverlapReversed) { { 2}, { 2}, {0 }}; + const std::vector> expected_reduced_intervals = + {{5, 5}, {0, 0}, {1, 4}}; const std::vector> expected_reduced_groups = {{0, 1}}; const std::pair expected_num_terms = {10, 8}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } @@ -165,10 +174,13 @@ TEST(AutoShardingMemoryTest, DoesNotSplitPrimitive) { {0, 1}, {0, 1}, {0 }}; + const std::vector> expected_reduced_intervals = + {{0, 5}, {1, 4}}; const std::vector> expected_reduced_groups = {}; const std::pair expected_num_terms = {10, 10}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } @@ -197,11 +209,14 @@ TEST(AutoShardingMemoryTest, OnePrimitiveVanishes) { { 2}, { 2}, { 2}}; + const std::vector> expected_reduced_intervals = + {{0, 0}, {6, 0}, {1, 5}}; const std::vector> expected_reduced_groups = {{0, 1}}; const std::pair expected_num_terms = {11, 8}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } @@ -230,11 +245,14 @@ TEST(AutoShardingMemoryTest, BothPrimitivesVanish) { {2}, {2}, {2}}; + const std::vector> expected_reduced_intervals = + {{6, -1}, {6, -1}, {0, 5}}; const std::vector> expected_reduced_groups = {{0, 1}}; const std::pair expected_num_terms = {12, 8}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } @@ -270,11 +288,14 @@ TEST(AutoShardingMemoryTest, OneGroupingPreventsAnother) { {1, 2 }, {1, 2 }, { 2 }}; + const std::vector> expected_reduced_intervals = + {{5, -1}, {5, 7}, {4, 8}, {0, 4}}; const std::vector> expected_reduced_groups = {{0, 1}}; const std::pair expected_num_terms = {18, 15}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } @@ -304,11 +325,14 @@ TEST(AutoShardingMemoryTest, TwoGroups) { {4}, {4}, {4}}; + const std::vector> expected_reduced_intervals = + {{6, 2}, {3, -1}, {6, 2}, {0, 2}, {3, 5}}; const std::vector> expected_reduced_groups = {{0, 1}, {0, 2}}; const std::pair expected_num_terms = {12, 10}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } @@ -343,11 +367,14 @@ TEST(AutoShardingMemoryTest, TwoGroupsMutuallyExclusive) { { 5}, { 5}, { 3 }}; + const std::vector> expected_reduced_intervals = + {{0, 0}, {4, 0}, {7, 3}, {7, 7}, {1, 3}, {4, 6}}; const std::vector> expected_reduced_groups = {{0, 1}, {2, 3}}; const std::pair expected_num_terms = {14, 12}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } @@ -367,10 +394,13 @@ TEST(AutoShardingMemoryTest, MergingPrimitivesWouldNotReduceTerms) { const std::vector> expected_reduced_live = {{0, 1}, {0, 1}}; + const std::vector> expected_reduced_intervals = + {{0, 1}, {0, 1}}; const std::vector> expected_reduced_groups = {}; const std::pair expected_num_terms = {4, 4}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } @@ -400,11 +430,14 @@ TEST(AutoShardingMemoryTest, AllPrimitivesVanish) { {3}, {3}, {3}}; + const std::vector> expected_reduced_intervals = + {{6, -1}, {6, -1}, {6, -1}, {0, 5}}; const std::vector> expected_reduced_groups = {{0, 1, 2}}; const std::pair expected_num_terms = {18, 9}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } @@ -442,11 +475,14 @@ TEST(AutoShardingMemoryTest, MergingGroupsWouldNotReduceTerms) { {4, 5}, { 5}, { 5}}; + const std::vector> expected_reduced_intervals = + {{7, -1}, {7, -1}, {9, 2}, {9, 2}, {0, 6}, {3, 8}}; const std::vector> expected_reduced_groups = {{0, 1}, {2, 3}}; const std::pair expected_num_terms = {26, 17}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } @@ -493,11 +529,14 @@ TEST(AutoShardingMemoryTest, ExampleFromDocumentation) { { 5}, { 5}, { 3 }}; + const std::vector> expected_reduced_intervals = + {{0, 0}, {10, 0}, {13, 4}, {13, 13}, {1, 4}, {10, 12}, {5, 9}}; const std::vector> expected_reduced_groups = {{0, 1}, {2, 3}, {0, 1, 2, 3}}; const std::pair expected_num_terms = {36, 22}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } @@ -523,11 +562,14 @@ TEST(AutoShardingMemoryTest, MergesWithRightmost) { { 3}, { 3}, {1, 2 }}; + const std::vector> expected_reduced_intervals = + {{3, -1}, {3, 3}, {3, 3}, {0, 2}}; const std::vector> expected_reduced_groups = {{0, 2}}; const std::pair expected_num_terms = {8, 7}; EXPECT_EQ(num_terms, expected_num_terms); EXPECT_EQ(reducer.GetReducedLive(), expected_reduced_live); + EXPECT_EQ(reducer.GetReducedIntervals(), expected_reduced_intervals); EXPECT_EQ(reducer.GetReducedGroups(), expected_reduced_groups); } From 5938029cd2670e6aa29953de9aea7a88fb598fa0 Mon Sep 17 00:00:00 2001 From: Kuy Mainwaring Date: Tue, 14 May 2024 16:52:32 -0700 Subject: [PATCH 147/478] [XLA:GPU] Clang-tidy cleanup for xla/service/gpu/... for const-correctness PiperOrigin-RevId: 633747545 --- third_party/xla/xla/service/gpu/cublas_pad_for_gemms.cc | 2 +- third_party/xla/xla/service/gpu/cudnn_fused_mha_rewriter.cc | 5 +++-- .../xla/xla/service/gpu/cudnn_fused_mha_transpose_fusion.cc | 2 -- third_party/xla/xla/service/gpu/gpu_conv_rewriter.cc | 2 +- .../xla/service/gpu/runtime/nccl_collective_permute_thunk.cc | 4 ++-- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/third_party/xla/xla/service/gpu/cublas_pad_for_gemms.cc b/third_party/xla/xla/service/gpu/cublas_pad_for_gemms.cc index 050f219d12b6c8..a90981f1c932fb 100644 --- a/third_party/xla/xla/service/gpu/cublas_pad_for_gemms.cc +++ b/third_party/xla/xla/service/gpu/cublas_pad_for_gemms.cc @@ -133,7 +133,7 @@ namespace { // We need this check because PadForGemm works in the assumption that // the dot instruction is canonicalized. bool CheckCanonical(HloDotInstruction* dot) { - auto dimension_numbers = dot->dot_dimension_numbers(); + const auto& dimension_numbers = dot->dot_dimension_numbers(); if (dimension_numbers.lhs_batch_dimensions_size() + 2 != dot->operand(0)->shape().rank() || diff --git a/third_party/xla/xla/service/gpu/cudnn_fused_mha_rewriter.cc b/third_party/xla/xla/service/gpu/cudnn_fused_mha_rewriter.cc index b25cfe8d7cbe55..0dea3331bc11cd 100644 --- a/third_party/xla/xla/service/gpu/cudnn_fused_mha_rewriter.cc +++ b/third_party/xla/xla/service/gpu/cudnn_fused_mha_rewriter.cc @@ -1344,7 +1344,7 @@ absl::StatusOr FuseFwdMultiHeadedAttentionBlock( bcast_dimensions.push_back(dim - starting_index); } - Shape bcast_shape = bmm_1->shape(); + const Shape& bcast_shape = bmm_1->shape(); bias = comp->AddInstruction(HloInstruction::CreateBroadcast( bcast_shape, original_bias, bcast_dimensions)); } @@ -1402,7 +1402,8 @@ absl::StatusOr FuseBwdMultiHeadedAttentionBlock( TF_ASSIGN_OR_RETURN(GpuBackendConfig gpu_config, fwd_fmha_call->backend_config()); - CudnnfMHABackendConfig fwd_config = gpu_config.cudnn_fmha_backend_config(); + const CudnnfMHABackendConfig& fwd_config = + gpu_config.cudnn_fmha_backend_config(); bool is_causal_mask = fwd_config.mask_type() == CudnnfMHABackendConfig::CAUSAL; CudnnfMHABackendConfig bwd_fmha_config; diff --git a/third_party/xla/xla/service/gpu/cudnn_fused_mha_transpose_fusion.cc b/third_party/xla/xla/service/gpu/cudnn_fused_mha_transpose_fusion.cc index 81074a29638b9e..665cc0bf824383 100644 --- a/third_party/xla/xla/service/gpu/cudnn_fused_mha_transpose_fusion.cc +++ b/third_party/xla/xla/service/gpu/cudnn_fused_mha_transpose_fusion.cc @@ -463,8 +463,6 @@ absl::StatusOr FusePrologueTransposeWithcuDNNFMHA(HloComputation* comp) { // should have the same layout of O TF_ASSIGN_OR_RETURN(auto gpu_config, fmha->backend_config()); - const CudnnfMHABackendConfig config = - gpu_config.cudnn_fmha_backend_config(); if (changed && VLOG_IS_ON(2)) { VLOG(2) << "After CudnnFusedMHATransposeFusion Arg 4: \n" << comp->parent()->ToString(); diff --git a/third_party/xla/xla/service/gpu/gpu_conv_rewriter.cc b/third_party/xla/xla/service/gpu/gpu_conv_rewriter.cc index 8ba1af16338870..b8dbf25879db5a 100644 --- a/third_party/xla/xla/service/gpu/gpu_conv_rewriter.cc +++ b/third_party/xla/xla/service/gpu/gpu_conv_rewriter.cc @@ -65,7 +65,7 @@ bool MaybeConv1dToConv2d(HloInstruction* conv) { if (reshape_degenerate.has_value() && reshape_degenerate->deleted_dimensions.empty() && reshape_degenerate->inserted_dimensions.size() == 1) { - auto dnums = conv->convolution_dimension_numbers(); + const auto& dnums = conv->convolution_dimension_numbers(); for (auto dim : dnums.kernel_spatial_dimensions()) { if (dim == reshape_degenerate->inserted_dimensions[0]) { return true; diff --git a/third_party/xla/xla/service/gpu/runtime/nccl_collective_permute_thunk.cc b/third_party/xla/xla/service/gpu/runtime/nccl_collective_permute_thunk.cc index 804c30b42e3690..02a8a583d754ef 100644 --- a/third_party/xla/xla/service/gpu/runtime/nccl_collective_permute_thunk.cc +++ b/third_party/xla/xla/service/gpu/runtime/nccl_collective_permute_thunk.cc @@ -89,7 +89,7 @@ NcclCollectivePermuteStartThunk::NcclCollectivePermuteStartThunk( replica_group.add_replica_ids(i); } - const std::vector> source_target_pairs = + const std::vector>& source_target_pairs = instr->source_target_pairs(); for (const std::pair& source_target : source_target_pairs) { @@ -110,7 +110,7 @@ NcclCollectivePermuteStartThunk::NcclCollectivePermuteStartThunk( int64_t partition_count) { // The collective permute is degenerate if all source-target pairs are // identity, and all the IDs appear in the list. - const std::vector> source_target_pairs = + const std::vector>& source_target_pairs = instr->source_target_pairs(); // Each ID can appear only once as a source and as a target. So if all pairs // are identity, all IDs must appear in the list is the size == number of From 5f64c63ba91479f6343be97ee4c60cb7eadfb4dd Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 14 May 2024 16:57:21 -0700 Subject: [PATCH 148/478] Integrate LLVM at llvm/llvm-project@f89b1b8a6806 Updates LLVM usage to match [f89b1b8a6806](https://github.com/llvm/llvm-project/commit/f89b1b8a6806) PiperOrigin-RevId: 633748740 --- third_party/llvm/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index 8d2c0c7e5a2254..a7d6c7b299393d 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "e6d3a4212d20b49a8e63f11fedea79cccf261479" - LLVM_SHA256 = "a9581601d91836d182180e35a698550b9c8257eacead9e5cc7ec956128200fce" + LLVM_COMMIT = "f89b1b8a68065c4b880417abb0563bce21399b52" + LLVM_SHA256 = "eba609768a9fbce79c7096d156fc96e999296cc61aca9ca736df7a9c5236215c" tf_http_archive( name = name, From 161d89a2856b920645467a7243843461db121195 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 17:19:42 -0700 Subject: [PATCH 149/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633754444 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index cdc0191b8ad6c9..4b2857a8994764 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugproto +go/debugonly op { name: "Abort" attr { From 102e08c2c67113289b4ac1d60fd8c16ce2f401b9 Mon Sep 17 00:00:00 2001 From: Yang Chen Date: Tue, 14 May 2024 17:52:44 -0700 Subject: [PATCH 150/478] #tf-data Support random access for the unbatch dataset. PiperOrigin-RevId: 633761597 --- .../core/kernels/data/experimental/BUILD | 3 +- .../data/experimental/unbatch_dataset_op.cc | 28 +++++++++++++++++ tensorflow/python/data/kernel_tests/BUILD | 1 + .../python/data/kernel_tests/unbatch_test.py | 30 +++++++++++++++++++ 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD index f24cff66f1ef98..5adbe7abe25ee8 100644 --- a/tensorflow/core/kernels/data/experimental/BUILD +++ b/tensorflow/core/kernels/data/experimental/BUILD @@ -922,8 +922,9 @@ tf_kernel_library( "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core/data:dataset_utils", "//tensorflow/core/framework:dataset_options_proto_cc", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", "@local_tsl//tsl/platform:errors", "@local_tsl//tsl/platform:mutex", "@local_tsl//tsl/platform:status", diff --git a/tensorflow/core/kernels/data/experimental/unbatch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/unbatch_dataset_op.cc index 4b4433ca27b27d..3cc332024328e8 100644 --- a/tensorflow/core/kernels/data/experimental/unbatch_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/unbatch_dataset_op.cc @@ -19,6 +19,8 @@ limitations under the License. #include #include +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" #include "tensorflow/core/framework/dataset.h" #include "tensorflow/core/framework/dataset_options.pb.h" #include "tensorflow/core/framework/model.h" @@ -122,6 +124,32 @@ class UnbatchDatasetOp : public UnaryDatasetOpKernel { return input_->CheckExternalState(); } + absl::Status Get(OpKernelContext* ctx, int64_t index, + std::vector* out_tensors) const override { + TF_RETURN_IF_ERROR(CheckRandomAccessCompatible(index)); + if (batch_size_ <= 0) { + return absl::FailedPreconditionError(absl::StrCat( + "Random access for the `unbatch` dataset requires a known batch " + "size. Got ", + batch_size_, ".")); + } + + const int64_t input_index = index / batch_size_; + const int64_t input_offset = index % batch_size_; + std::vector input_tensors; + TF_RETURN_IF_ERROR(input_->Get(ctx, input_index, &input_tensors)); + for (int64_t i = 0; i < input_tensors.size(); ++i) { + const DataType& dtype = input_tensors[i].dtype(); + TensorShape shape = input_tensors[i].shape(); + shape.RemoveDim(0); + + out_tensors->emplace_back(ctx->get_allocator({}), dtype, shape); + TF_RETURN_IF_ERROR(batch_util::MaybeMoveSliceToElement( + &input_tensors[i], &out_tensors->back(), input_offset)); + } + return absl::OkStatus(); + } + protected: Status AsGraphDefInternal(SerializationContext* ctx, DatasetGraphDefBuilder* b, diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD index dc0d785798622b..da147e0f39435e 100644 --- a/tensorflow/python/data/kernel_tests/BUILD +++ b/tensorflow/python/data/kernel_tests/BUILD @@ -1360,6 +1360,7 @@ tf_py_strict_test( deps = [ ":checkpoint_test_base", ":test_base", + "//tensorflow/python/data/experimental/ops:random_access", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/ops:options", "//tensorflow/python/framework:combinations", diff --git a/tensorflow/python/data/kernel_tests/unbatch_test.py b/tensorflow/python/data/kernel_tests/unbatch_test.py index d86075cd2b73bf..a65c6a2f2f393d 100644 --- a/tensorflow/python/data/kernel_tests/unbatch_test.py +++ b/tensorflow/python/data/kernel_tests/unbatch_test.py @@ -16,6 +16,7 @@ from absl.testing import parameterized import numpy as np +from tensorflow.python.data.experimental.ops import random_access from tensorflow.python.data.kernel_tests import checkpoint_test_base from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.data.ops import dataset_ops @@ -262,5 +263,34 @@ def test(self, verify_fn, symbolic_checkpoint): num_outputs) +class UnbatchRandomAccessTest(test_base.DatasetTestBase, + parameterized.TestCase): + @combinations.generate(test_base.default_test_combinations()) + def test(self): + dataset = dataset_ops.Dataset.range(10) + dataset = dataset.batch(4, drop_remainder=True) + dataset = dataset.unbatch() + for i in range(8): + self.assertEqual(self.evaluate(random_access.at(dataset, i)), i) + + @combinations.generate(test_base.default_test_combinations()) + def testNotDropRemainder(self): + dataset = dataset_ops.Dataset.range(10) + dataset = dataset.batch(4, drop_remainder=False) + dataset = dataset.unbatch() + with self.assertRaises(errors.FailedPreconditionError): + self.evaluate(random_access.at(dataset, 0)) + + @combinations.generate( + combinations.times(test_base.default_test_combinations(), + combinations.combine(index=[-1, 100]))) + def testInvalidIndex(self, index): + dataset = dataset_ops.Dataset.range(10) + dataset = dataset.batch(4, drop_remainder=True) + dataset = dataset.unbatch() + with self.assertRaises(errors.OutOfRangeError): + self.evaluate(random_access.at(dataset, index=index)) + + if __name__ == "__main__": test.main() From f6b72954734f8304bfb83228bd8406a3ba3394f4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 19:17:56 -0700 Subject: [PATCH 151/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633777654 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 4b2857a8994764..9dc272e0a4f6ed 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugonly +go/debugproto op { name: "Abort" attr { From 4f02320c5b379ff7a9f4cc9b665f1937a27d9a47 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 21:17:42 -0700 Subject: [PATCH 152/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633801239 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 9dc272e0a4f6ed..4b2857a8994764 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugproto +go/debugonly op { name: "Abort" attr { From f34cf56711a673edbf082d42a318626815a40a86 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 22:04:15 -0700 Subject: [PATCH 153/478] Automated Code Change PiperOrigin-RevId: 633809834 --- third_party/xla/xla/BUILD | 5 ++++- third_party/xla/xla/frontend_attributes.cc | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/third_party/xla/xla/BUILD b/third_party/xla/xla/BUILD index 2219ec3a37e78c..304ed5a4d2b031 100644 --- a/third_party/xla/xla/BUILD +++ b/third_party/xla/xla/BUILD @@ -224,7 +224,10 @@ cc_library( "frontend_attributes.h", ], visibility = internal_visibility([":friends"]), - deps = ["//xla/hlo/ir:hlo"], + deps = [ + ":xla_data_proto_cc", + "//xla/hlo/ir:hlo", + ], ) cc_library( diff --git a/third_party/xla/xla/frontend_attributes.cc b/third_party/xla/xla/frontend_attributes.cc index 8831040f89c15c..53ee0a4d1f1643 100644 --- a/third_party/xla/xla/frontend_attributes.cc +++ b/third_party/xla/xla/frontend_attributes.cc @@ -14,6 +14,9 @@ limitations under the License. ==============================================================================*/ #include "xla/frontend_attributes.h" +#include "xla/hlo/ir/hlo_instruction.h" +#include "xla/xla_data.pb.h" + namespace xla { void SetDisjointReadWriteRegionsAttr(HloInstruction* instruction) { From bf42cf1551e1cfd0226ae9db1e79818f720eb317 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 22:41:38 -0700 Subject: [PATCH 154/478] Automated Code Change PiperOrigin-RevId: 633817088 --- tensorflow/tools/benchmark/BUILD | 8 +++++++- tensorflow/tools/benchmark/benchmark_model.cc | 17 +++++++++++++---- tensorflow/tools/benchmark/benchmark_model.h | 4 ++++ .../tools/benchmark/benchmark_model_test.cc | 18 +++++++++++++----- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/tensorflow/tools/benchmark/BUILD b/tensorflow/tools/benchmark/BUILD index dbac33541f0872..cdfb36c3a3eae2 100644 --- a/tensorflow/tools/benchmark/BUILD +++ b/tensorflow/tools/benchmark/BUILD @@ -40,7 +40,11 @@ cc_library( "//tensorflow/core:tensorflow", "//tensorflow/core:test", ], - }), + }) + [ + "//tensorflow/core/platform:numbers", + "//tensorflow/core/util:stats_calculator_portable", + "@local_tsl//tsl/platform:status", + ], ) tf_cc_test( @@ -50,9 +54,11 @@ tf_cc_test( deps = [ ":benchmark_model_lib", "//tensorflow/cc:cc_ops", + "//tensorflow/cc:scope", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc index f0a97c2170a3fc..b687d725dccf06 100644 --- a/tensorflow/tools/benchmark/benchmark_model.cc +++ b/tensorflow/tools/benchmark/benchmark_model.cc @@ -26,25 +26,34 @@ limitations under the License. #include #include -#include "tensorflow/core/common_runtime/graph_constructor.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/framework/step_stats.pb.h" #include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/platform.h" +#include "tensorflow/core/platform/numbers.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/tstring.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/public/session.h" +#include "tensorflow/core/public/session_options.h" #include "tensorflow/core/util/command_line_flags.h" #include "tensorflow/core/util/reporter.h" #include "tensorflow/core/util/stat_summarizer.h" +#include "tensorflow/core/util/stat_summarizer_options.h" +#include "tensorflow/core/util/stats_calculator.h" +#include "tsl/platform/errors.h" +#include "tsl/platform/status.h" namespace tensorflow { namespace benchmark_model { diff --git a/tensorflow/tools/benchmark/benchmark_model.h b/tensorflow/tools/benchmark/benchmark_model.h index 8211605ace6dea..e983ea4167d740 100644 --- a/tensorflow/tools/benchmark/benchmark_model.h +++ b/tensorflow/tools/benchmark/benchmark_model.h @@ -16,6 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_TOOLS_BENCHMARK_BENCHMARK_MODEL_H_ #define TENSORFLOW_TOOLS_BENCHMARK_BENCHMARK_MODEL_H_ +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/session.h" #include "tensorflow/core/util/stat_summarizer.h" diff --git a/tensorflow/tools/benchmark/benchmark_model_test.cc b/tensorflow/tools/benchmark/benchmark_model_test.cc index c2a953ec91f7a7..c6e42840c6a689 100644 --- a/tensorflow/tools/benchmark/benchmark_model_test.cc +++ b/tensorflow/tools/benchmark/benchmark_model_test.cc @@ -15,13 +15,21 @@ limitations under the License. #include "tensorflow/tools/benchmark/benchmark_model.h" -#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/cc/framework/scope.h" +#include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/math_ops.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_testutil.h" -#include "tensorflow/core/graph/graph_def_builder.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/path.h" #include "tensorflow/core/platform/test.h" -#include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/public/session.h" +#include "tensorflow/core/util/stat_summarizer.h" +#include "tsl/lib/core/status_test_util.h" namespace tensorflow { namespace { From 17de42e5c78c8944a1643101187efd461b07cf0d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 23:17:37 -0700 Subject: [PATCH 155/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633823815 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 4b2857a8994764..846207488b92f3 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugonly +go/debugstr op { name: "Abort" attr { From 2206fdfecea8507224029b670fea938b44d54395 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 May 2024 23:31:50 -0700 Subject: [PATCH 156/478] Automated Code Change PiperOrigin-RevId: 633826568 --- third_party/xla/xla/tsl/concurrency/concurrent_vector_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/xla/xla/tsl/concurrency/concurrent_vector_test.cc b/third_party/xla/xla/tsl/concurrency/concurrent_vector_test.cc index 9d384ad8b80d08..5106909ce06146 100644 --- a/third_party/xla/xla/tsl/concurrency/concurrent_vector_test.cc +++ b/third_party/xla/xla/tsl/concurrency/concurrent_vector_test.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include "tsl/platform/env.h" #include "tsl/platform/test.h" #include "tsl/platform/threadpool.h" From 30dc4d79b93c1af722d07c7e73251e2af407eab4 Mon Sep 17 00:00:00 2001 From: Dateng Lin Date: Tue, 14 May 2024 23:47:32 -0700 Subject: [PATCH 157/478] Fixed the test failure. PiperOrigin-RevId: 633829720 --- tensorflow/core/tfrt/ifrt/ifrt_model_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/tfrt/ifrt/ifrt_model_context.h b/tensorflow/core/tfrt/ifrt/ifrt_model_context.h index 20af882d634a7a..76813a3a80937a 100644 --- a/tensorflow/core/tfrt/ifrt/ifrt_model_context.h +++ b/tensorflow/core/tfrt/ifrt/ifrt_model_context.h @@ -125,7 +125,7 @@ class IfrtModelContext { IfrtServingCoreSelector* ifrt_serving_core_selector_; // May be nullptr const tsl::thread::ThreadPool& thread_pool_; - tensorflow::DeviceMgr* device_mgr_; // Not owned. + tensorflow::DeviceMgr* device_mgr_ = nullptr; // Not owned. tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn_ = tensorflow::IdentityShapeRepresentationFn(); From 6c46714176e0cb907de346758d5ffd19bf0382af Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 May 2024 00:13:28 -0700 Subject: [PATCH 158/478] Automated Code Change PiperOrigin-RevId: 633835586 --- tensorflow/core/platform/file_system_test.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/platform/file_system_test.cc b/tensorflow/core/platform/file_system_test.cc index 2ecc93f11ab507..1c4d978f13ca6c 100644 --- a/tensorflow/core/platform/file_system_test.cc +++ b/tensorflow/core/platform/file_system_test.cc @@ -38,7 +38,7 @@ class InterPlanetaryFileSystem : public NullFileSystem { string parsed_path; ParsePath(fname, &parsed_path); if (BodyExists(parsed_path)) { - return OkStatus(); + return absl::OkStatus(); } return Status(absl::StatusCode::kNotFound, "File does not exist"); } @@ -58,13 +58,13 @@ class InterPlanetaryFileSystem : public NullFileSystem { return Status(absl::StatusCode::kInvalidArgument, "Bad dirname"); } if (split_path.empty()) { - return OkStatus(); + return absl::OkStatus(); } if (split_path.size() == 1) { celestial_bodies_[""].insert(parsed_path); celestial_bodies_.insert( std::pair>(parsed_path, {})); - return OkStatus(); + return absl::OkStatus(); } if (split_path.size() == 2) { if (!BodyExists(split_path[0])) { @@ -74,7 +74,7 @@ class InterPlanetaryFileSystem : public NullFileSystem { celestial_bodies_[split_path[0]].insert(split_path[1]); celestial_bodies_.insert( std::pair>(parsed_path, {})); - return OkStatus(); + return absl::OkStatus(); } if (split_path.size() == 3) { const string& parent_path = this->JoinPath(split_path[0], split_path[1]); @@ -85,7 +85,7 @@ class InterPlanetaryFileSystem : public NullFileSystem { celestial_bodies_[parent_path].insert(split_path[2]); celestial_bodies_.insert( std::pair>(parsed_path, {})); - return OkStatus(); + return absl::OkStatus(); } return Status(absl::StatusCode::kFailedPrecondition, "Failed to create"); } @@ -102,7 +102,7 @@ class InterPlanetaryFileSystem : public NullFileSystem { return Status(absl::StatusCode::kFailedPrecondition, "Not a dir"); } if (celestial_bodies_.find(parsed_path) != celestial_bodies_.end()) { - return OkStatus(); + return absl::OkStatus(); } return Status(absl::StatusCode::kFailedPrecondition, "Not a dir"); } @@ -114,7 +114,7 @@ class InterPlanetaryFileSystem : public NullFileSystem { ParsePath(dir, &parsed_path); result->insert(result->begin(), celestial_bodies_[parsed_path].begin(), celestial_bodies_[parsed_path].end()); - return OkStatus(); + return absl::OkStatus(); } private: @@ -279,7 +279,7 @@ class TestFileSystem : public NullFileSystem { // Only allow for a single root directory. Status IsDirectory(const string& dirname, TransactionToken* token) override { if (dirname == "." || dirname.empty()) { - return OkStatus(); + return absl::OkStatus(); } return Status(absl::StatusCode::kFailedPrecondition, "Not a dir"); } @@ -290,7 +290,7 @@ class TestFileSystem : public NullFileSystem { if (dir == "." || dir.empty()) { result->push_back("test"); } - return OkStatus(); + return absl::OkStatus(); } }; From c42c1478f0e2e36861f3dda1d320d136a74e1ba6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 May 2024 00:14:36 -0700 Subject: [PATCH 159/478] Use min subgroup size when 16 is not supported PiperOrigin-RevId: 633835830 --- tensorflow/lite/delegates/gpu/common/BUILD | 1 + tensorflow/lite/delegates/gpu/common/gpu_info.cc | 12 ++++++++++++ tensorflow/lite/delegates/gpu/common/gpu_info.h | 2 ++ tensorflow/lite/delegates/gpu/common/tasks/BUILD | 1 + .../lite/delegates/gpu/common/tasks/conv_generic.cc | 10 +++++++--- 5 files changed, 23 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/common/BUILD b/tensorflow/lite/delegates/gpu/common/BUILD index 0a55ad05a76968..195124f269c7c8 100644 --- a/tensorflow/lite/delegates/gpu/common/BUILD +++ b/tensorflow/lite/delegates/gpu/common/BUILD @@ -44,6 +44,7 @@ cc_library( deps = [ ":data_type", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/lite/delegates/gpu/common/gpu_info.cc b/tensorflow/lite/delegates/gpu/common/gpu_info.cc index 944202cb3d8561..2627adda13c6bd 100644 --- a/tensorflow/lite/delegates/gpu/common/gpu_info.cc +++ b/tensorflow/lite/delegates/gpu/common/gpu_info.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include +#include "absl/status/status.h" #include "absl/strings/ascii.h" namespace tflite { @@ -981,6 +982,17 @@ bool GpuInfo::SupportsSubGroupWithSize(int sub_group_size) const { return false; } +absl::Status GpuInfo::GetMinSubGroupSize(int& min_sub_group_size) const { + auto begin = supported_subgroup_sizes.begin(); + auto end = supported_subgroup_sizes.end(); + auto min = std::min_element(begin, end); + if (min == end) { + return absl::InternalError("No supported subgroup sizes"); + } + min_sub_group_size = *min; + return absl::OkStatus(); +} + bool GpuInfo::SupportsFloatImage2D(DataType data_type, int channels) const { if (IsApiOpenCl()) { return opencl_info.supported_images_2d.SupportsImage2D(data_type, channels); diff --git a/tensorflow/lite/delegates/gpu/common/gpu_info.h b/tensorflow/lite/delegates/gpu/common/gpu_info.h index 15bdb065f15361..9849e2405d32e2 100644 --- a/tensorflow/lite/delegates/gpu/common/gpu_info.h +++ b/tensorflow/lite/delegates/gpu/common/gpu_info.h @@ -21,6 +21,7 @@ limitations under the License. #include #include "absl/container/flat_hash_set.h" +#include "absl/status/status.h" #include "absl/strings/match.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" @@ -524,6 +525,7 @@ struct GpuInfo { // returns true if device have fixed wave size equal to 32 bool IsWaveSizeEqualTo32() const; bool SupportsSubGroupWithSize(int sub_group_size) const; + absl::Status GetMinSubGroupSize(int& min_sub_group_size) const; bool SupportsFloatImage2D(DataType data_type, int channels) const; bool SupportsExtension(const std::string& extension) const; diff --git a/tensorflow/lite/delegates/gpu/common/tasks/BUILD b/tensorflow/lite/delegates/gpu/common/tasks/BUILD index 01ad443db54ae2..0b9662471277e4 100644 --- a/tensorflow/lite/delegates/gpu/common/tasks/BUILD +++ b/tensorflow/lite/delegates/gpu/common/tasks/BUILD @@ -143,6 +143,7 @@ cc_library( hdrs = ["conv_generic.h"], deps = [ "//tensorflow/lite/delegates/gpu/common:data_type", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:status", diff --git a/tensorflow/lite/delegates/gpu/common/tasks/conv_generic.cc b/tensorflow/lite/delegates/gpu/common/tasks/conv_generic.cc index c2a1bd8f539512..1e5cc2f14b9e82 100644 --- a/tensorflow/lite/delegates/gpu/common/tasks/conv_generic.cc +++ b/tensorflow/lite/delegates/gpu/common/tasks/conv_generic.cc @@ -1791,16 +1791,20 @@ ConvGeneric::ConvParams ConvGeneric::GuessBestParams( const int kSubGroupSize = 16; const bool supports_subgroup_size_control = gpu_info.SupportsExtension("cl_intel_required_subgroup_size"); + int min_subgroup_size; + auto min_subgroup_size_status = + gpu_info.GetMinSubGroupSize(min_subgroup_size); if (supports_subgroup_size_control && gpu_info.SupportsSubGroupWithSize(kSubGroupSize)) { conv_params.weights_upload_type = WeightsUploadType::PRIVATE_MEM_SIMD_BROADCAST; conv_params.simd_size = kSubGroupSize; - } else if (gpu_info.opencl_info.IsCLVK()) { - // It will work because of specific driver using subgroup size 16 + } else if (supports_subgroup_size_control && + min_subgroup_size_status.ok()) { conv_params.weights_upload_type = WeightsUploadType::PRIVATE_MEM_SIMD_BROADCAST; - conv_params.simd_size = 16; + conv_params.simd_size = min_subgroup_size; + work_group_size_ = int3(min_subgroup_size, 1, 1); } else { // no support of subgroup size control // only smallest subgroup size (8) can be used safely, otherwise From 32f6db8a0b0f4801445e9007d7ffc94a54b8ac14 Mon Sep 17 00:00:00 2001 From: Siqiao Wu Date: Wed, 15 May 2024 00:16:34 -0700 Subject: [PATCH 160/478] Add core selector support for TFRT+IFRT serving on tensorflow serving PiperOrigin-RevId: 633836270 --- tensorflow/core/tfrt/ifrt/ifrt_config.proto | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/core/tfrt/ifrt/ifrt_config.proto b/tensorflow/core/tfrt/ifrt/ifrt_config.proto index 38c0f6ec0e0902..f11d61049ce666 100644 --- a/tensorflow/core/tfrt/ifrt/ifrt_config.proto +++ b/tensorflow/core/tfrt/ifrt/ifrt_config.proto @@ -10,3 +10,11 @@ message VariableDeviceShardingConfigProto { xla.OpSharding sharding = 1; repeated int32 device_ids = 2; } + +enum IfrtServingCoreSelectionPolicy { + // Default policy to select the soonest-to-finish core defined at + // http://shortn/_XQhI1ASAu0. + IFRT_SERVING_CORE_SELECTION_POLICY_DEFAULT = 0; + // Policy that round robin with local ordinal http://shortn/_7BtVe4dkp5. + IFRT_SERVING_CORE_SELECTION_POLICY_LOCAL_ROUND_ROBIN = 1; +} From 7087cf81034c4d4c1a2d87f052b9aeec88b738a5 Mon Sep 17 00:00:00 2001 From: Henning Becker Date: Wed, 15 May 2024 00:24:47 -0700 Subject: [PATCH 161/478] [XLA:GPU] Remove unused sleep kernel Nothing is using the sleep kernel, so let's remove it PiperOrigin-RevId: 633838040 --- third_party/xla/xla/service/gpu/BUILD | 7 ---- .../xla/xla/service/gpu/sleep_kernel.cu.cc | 32 ------------------- .../xla/xla/service/gpu/sleep_kernel.h | 26 --------------- 3 files changed, 65 deletions(-) delete mode 100644 third_party/xla/xla/service/gpu/sleep_kernel.cu.cc delete mode 100644 third_party/xla/xla/service/gpu/sleep_kernel.h diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index c19905c49698c8..9206be42cc5ddc 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -874,13 +874,6 @@ cc_library( ], ) -cuda_library( - name = "sleep_kernel", - srcs = if_cuda_is_configured(["sleep_kernel.cu.cc"]), - hdrs = if_cuda_is_configured(["sleep_kernel.h"]), - deps = ["@local_config_cuda//cuda:cuda_headers"], -) - # TODO(b/244780257): Remove this config. bool_flag( name = "enable_xlir", diff --git a/third_party/xla/xla/service/gpu/sleep_kernel.cu.cc b/third_party/xla/xla/service/gpu/sleep_kernel.cu.cc deleted file mode 100644 index 8f37d47e677347..00000000000000 --- a/third_party/xla/xla/service/gpu/sleep_kernel.cu.cc +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright 2023 The OpenXLA Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "xla/service/gpu/sleep_kernel.h" - -namespace xla::gpu { -namespace { - -// Use busy waiting instead of __nanosleep() to make the code more portable -// (__nanosleep requires __CUDA_ARCH__ >= 700) -__global__ void sleep(int64_t num_clocks) { - int64_t start = clock64(); - while (clock64() - start < num_clocks) continue; -} - -} // namespace - -void* GetSleepKernel() { return reinterpret_cast(&sleep); } - -} // namespace xla::gpu diff --git a/third_party/xla/xla/service/gpu/sleep_kernel.h b/third_party/xla/xla/service/gpu/sleep_kernel.h deleted file mode 100644 index 3e040b10860e65..00000000000000 --- a/third_party/xla/xla/service/gpu/sleep_kernel.h +++ /dev/null @@ -1,26 +0,0 @@ -/* Copyright 2023 The OpenXLA Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef XLA_SERVICE_GPU_SLEEP_KERNEL_H_ -#define XLA_SERVICE_GPU_SLEEP_KERNEL_H_ - -namespace xla::gpu { - -// Returns a pointer to CUDA kernel that does sleep operation on device. -void* GetSleepKernel(); - -} // namespace xla::gpu - -#endif // XLA_SERVICE_GPU_SLEEP_KERNEL_H_ From a8a6dd4b922ee56b136b014a27ab0694b427eef0 Mon Sep 17 00:00:00 2001 From: lingzhi98 <103185827+lingzhi98@users.noreply.github.com> Date: Wed, 15 May 2024 01:13:45 -0700 Subject: [PATCH 162/478] PR #12328: Make shared cache read/write logic more clearly for transpose mlir emitter Imported from GitHub PR https://github.com/openxla/xla/pull/12328 Current transpose mlir emitter allocate shared cache with shape 32x1x32 for transpose 2-1-0. But the read indices of shared cache are {0, y, x} as [this line](https://github.com/openxla/xla/blob/main/xla/service/gpu/fusions/transpose_mlir.cc#L190) shows, which is not compatible with 32x1x32 shape. What's strange is that transpose 2-1-0 can run successfully using transpose mlir emitter. I find the reason is that lower tensor pass use [linear index](https://github.com/openxla/xla/blob/main/xla/service/gpu/fusions/mlir/lower_tensors.cc#L148) to access shared cache, which is lucky to get right result. For example, the strides of 32x1x32 are {32, 32, 1}, and the linear index of {0, y ,x} is 0 * 32 + y * 32 + 32. I am not sure if it is as expected or just mistake. If reviewer think no need of this PR, feel free to close. Copybara import of the project: -- bfb21798ee518dc11293a5683669add619a38e53 by Zhou, Lingzhi : make shared cache read/write logic more clearly for transpose mlir emitter -- 0c9033334835bc8a14310e5ee059489cea7b5309 by Zhou, Lingzhi : refactor -- 5554110835fc18207fb466587c1aeb20c3a542fe by Zhou, Lingzhi : pad shared cache -- 8c17818baa1e2477952df15e412a6463f73106ab by Zhou, Lingzhi : include missing file Merging this change closes #12328 PiperOrigin-RevId: 633848774 --- .../xla/service/gpu/fusions/transpose_mlir.cc | 32 +++++++++---- .../gpu/fusions/transpose_mlir_test.cc | 46 ++++++++++++++++++- 2 files changed, 66 insertions(+), 12 deletions(-) diff --git a/third_party/xla/xla/service/gpu/fusions/transpose_mlir.cc b/third_party/xla/xla/service/gpu/fusions/transpose_mlir.cc index 2f602a75b185f4..79111485562508 100644 --- a/third_party/xla/xla/service/gpu/fusions/transpose_mlir.cc +++ b/third_party/xla/xla/service/gpu/fusions/transpose_mlir.cc @@ -180,13 +180,20 @@ IndexingMap GetSharedMemoryWriteIndexingMap( AffineExpr th_x = mlir::getAffineDimExpr(0, mlir_context); SmallVector tile_sizes(3); mlir::bindSymbolsList(mlir_context, llvm::MutableArrayRef(tile_sizes)); + SmallVector shared_memory_indices = { + th_x.floorDiv(32) + 4 * tile_sizes[loop_dim], th_x % 32}; + for (auto [index, range_val] : + llvm::enumerate(thread_id_indexing.GetRangeVars())) { + if (range_val.range.NumElements() == 1) { + shared_memory_indices.insert(shared_memory_indices.begin() + index, c0); + break; + } + } IndexingMap shmem_write_indexing{ - AffineMap::get( - thread_id_indexing.GetDimensionCount(), - thread_id_indexing.GetSymbolCount(), - {c0, th_x.floorDiv(32) + 4 * tile_sizes[loop_dim], th_x % 32}, - mlir_context), + AffineMap::get(thread_id_indexing.GetDimensionCount(), + thread_id_indexing.GetSymbolCount(), shared_memory_indices, + mlir_context), thread_id_indexing.GetDimVars(), thread_id_indexing.GetRangeVars(), thread_id_indexing.GetRTVars(), thread_id_indexing.GetConstraints()}; return shmem_write_indexing; @@ -195,10 +202,12 @@ IndexingMap GetSharedMemoryWriteIndexingMap( // Returns an indexing map with block_x, block_y, block_z set to 0 and swapped // 2nd and 3rd results. IndexingMap GetSharedMemoryReadIndexingMap( - const IndexingMap& thread_id_indexing, int loop_dim) { - IndexingMap write_indexing = - GetSharedMemoryWriteIndexingMap(thread_id_indexing, loop_dim); - return IndexingMap{write_indexing.GetAffineMap().getSubMap({0, 2, 1}), + const IndexingMap& thread_id_indexing, Vector3 permutation) { + IndexingMap write_indexing = GetSharedMemoryWriteIndexingMap( + thread_id_indexing, /*loop_dim=*/permutation[2]); + llvm::SmallVector positions; + absl::c_copy(permutation, std::back_inserter(positions)); + return IndexingMap{write_indexing.GetAffineMap().getSubMap(positions), write_indexing.GetDimVars(), write_indexing.GetRangeVars(), write_indexing.GetRTVars(), write_indexing.GetConstraints()}; @@ -212,6 +221,9 @@ MlirTransposeFusion::WriteResult MlirTransposeFusion::EmitWriteToShMemMlir( ValueRange output_args) const { std::vector shmem_tensor_size(tiling_.GetBlockTileSize().begin(), tiling_.GetBlockTileSize().end()); + // Avoid bank conflict. + ++shmem_tensor_size.back(); + MLIRContext* ctx = builder.getContext(); WriteResult write_result; @@ -305,7 +317,7 @@ void MlirTransposeFusion::EmitReadFromShMemMlir( auto* mlir_context = builder.getContext(); auto output_indexing = *ComputeThreadIdToOutputIndexing(0, mlir_context); auto shmem_output_indexing = - GetSharedMemoryReadIndexingMap(output_indexing, permutation_[2]); + GetSharedMemoryReadIndexingMap(output_indexing, permutation_); auto result_tensors = EmitThreadLoopNest( builder, written.updated_outputs, output_indexing, [&](ValueRange output_tensors, ValueRange dim_values, diff --git a/third_party/xla/xla/service/gpu/fusions/transpose_mlir_test.cc b/third_party/xla/xla/service/gpu/fusions/transpose_mlir_test.cc index 086ebf8f2ad911..b2343d916537cf 100644 --- a/third_party/xla/xla/service/gpu/fusions/transpose_mlir_test.cc +++ b/third_party/xla/xla/service/gpu/fusions/transpose_mlir_test.cc @@ -171,7 +171,49 @@ TEST_F(MlirTransposeFusionTest, FusedTranspose021) { // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index - // CHECK: %[[SHMEM:.*]] = xla_gpu.allocate_shared : tensor<1x32x32xf32> + // CHECK: %[[SHMEM:.*]] = xla_gpu.allocate_shared : tensor<1x32x33xf32> + // CHECK: %[[SHMEM_WITH_VALS:.*]] = scf.for + // CHECK-SAME: %[[C0]] to %[[C8]] step %[[C1]] + // CHECK-SAME: iter_args(%[[SHMEM_:.*]] = %[[SHMEM]]) + // CHECK: %[[EXP:.*]] = xla_gpu.pure_call @fused_computation_exp + // CHECK: tensor.insert %[[EXP]] into %[[SHMEM_]] + + // CHECK: %[[SYNC:.*]] = xla_gpu.sync_threads %[[SHMEM_WITH_VALS]] + + // CHECK: scf.for + // CHECK-SAME: %[[C0]] to %[[C8]] step %[[C1]] + // CHECK-SAME: iter_args(%[[OUT_:.*]] = %[[OUT]]) + // CHECK: %[[ABS:.*]] = xla_gpu.pure_call @fused_computation__epilogue__ + // CHECK: tensor.insert %[[ABS]] into %[[OUT_]] + )")); + EXPECT_TRUE(RunAndCompareNoHloPasses(kHloString, ErrorSpec{1e-3})); +} + +TEST_F(MlirTransposeFusionTest, FusedTranspose210) { + auto kHloString = R"( + HloModule Transpose + + %fused_computation { + %p0 = f32[20,160,170] parameter(0) + %exp = f32[20,160,170] exponential(%p0) + %transpose = f32[170,160,20] transpose(%exp), dimensions={2,1,0} + ROOT %abs = f32[170,160,20] abs(%transpose) + } + ENTRY main { + %param = f32[20,160,170] parameter(0) + ROOT %fusion = f32[170,160,20] fusion(%param), kind=kInput, + calls=%fused_computation + } + )"; + TF_ASSERT_OK(EmitAndCheckIR(kHloString, R"( + // CHECK-LABEL: func.func @fused_computation( + // CHECK-SAME: }, %[[OUT:.*]]: tensor<170x160x20xf32> + // + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index + // CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index + + // CHECK: %[[SHMEM:.*]] = xla_gpu.allocate_shared : tensor<32x1x33xf32> // CHECK: %[[SHMEM_WITH_VALS:.*]] = scf.for // CHECK-SAME: %[[C0]] to %[[C8]] step %[[C1]] // CHECK-SAME: iter_args(%[[SHMEM_:.*]] = %[[SHMEM]]) @@ -212,7 +254,7 @@ TEST_F(MlirTransposeFusionTest, Transpose021_Parameter) { // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index // CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index - // CHECK: %[[SHMEM:.*]] = xla_gpu.allocate_shared : tensor<1x32x32xf32> + // CHECK: %[[SHMEM:.*]] = xla_gpu.allocate_shared : tensor<1x32x33xf32> // CHECK: %[[SHMEM_WITH_VALS:.*]] = scf.for // CHECK-SAME: %[[C0]] to %[[C8]] step %[[C1]] // CHECK-SAME: iter_args(%[[SHMEM_:.*]] = %[[SHMEM]]) From 42aeac11da3458b463937dc382882ac459d8edea Mon Sep 17 00:00:00 2001 From: Harsha H S Date: Wed, 15 May 2024 01:26:40 -0700 Subject: [PATCH 163/478] PR #12463: [ROCm] Fix build break in `xla/service/gpu/runtime/convolution_thunk.cc` Imported from GitHub PR https://github.com/openxla/xla/pull/12463 Fix build break introduced in db8f9c0cf2bf08b171aec2cbe3da2fcb074a6207 Copybara import of the project: -- 42d5839ffb47362a7f6789ba98ef50b31189dccc by Harsha HS : [ROCm] Fix build break in `xla/service/gpu/runtime/convolution_thunk.cc` -- fd95e342a4e8de6c8348df3576510584a0020150 by Harsha HS : Gaurd header using TENSORFLOW_USE_ROCM Merging this change closes #12463 PiperOrigin-RevId: 633851634 --- third_party/xla/xla/service/gpu/runtime/BUILD | 2 +- third_party/xla/xla/service/gpu/runtime/convolution_thunk.cc | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/third_party/xla/xla/service/gpu/runtime/BUILD b/third_party/xla/xla/service/gpu/runtime/BUILD index e53035e558ee9d..4327ce5583e62b 100644 --- a/third_party/xla/xla/service/gpu/runtime/BUILD +++ b/third_party/xla/xla/service/gpu/runtime/BUILD @@ -527,7 +527,7 @@ cc_library( "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:span", "@local_tsl//tsl/platform:errors", - ], + ] + if_rocm_is_configured(["//xla/service/gpu:stream_executor_util"]), ) cc_library( diff --git a/third_party/xla/xla/service/gpu/runtime/convolution_thunk.cc b/third_party/xla/xla/service/gpu/runtime/convolution_thunk.cc index b7bba0fab3b418..32e81855e7ffb8 100644 --- a/third_party/xla/xla/service/gpu/runtime/convolution_thunk.cc +++ b/third_party/xla/xla/service/gpu/runtime/convolution_thunk.cc @@ -27,6 +27,9 @@ limitations under the License. #include "absl/synchronization/mutex.h" #include "absl/types/span.h" #include "xla/service/buffer_assignment.h" +#if TENSORFLOW_USE_ROCM +#include "xla/service/gpu/stream_executor_util.h" +#endif // TENSORFLOW_USE_ROCM #include "xla/service/gpu/gpu_conv_runner.h" #include "xla/service/gpu/runtime/thunk.h" #include "xla/stream_executor/device_memory.h" From e9215bfcc11aa5fe4115b4d4379894f0597720e6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 May 2024 01:27:19 -0700 Subject: [PATCH 164/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633851744 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 846207488b92f3..a15dada136af38 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugstr +go/nodeserialize op { name: "Abort" attr { From f55a2ac80498820b4e7ae15c81e21f847b133a36 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 May 2024 01:53:14 -0700 Subject: [PATCH 165/478] Automated Code Change PiperOrigin-RevId: 633857603 --- tensorflow/compiler/mlir/tensorflow/BUILD | 2 ++ .../tensorflow/analysis/resource_alias_analysis.cc | 13 ++++++++++--- .../tensorflow/analysis/resource_alias_analysis.h | 7 +++++++ .../mlir/tensorflow/analysis/resource_dataflow.h | 2 ++ .../analysis/resource_value_typed_analyzer.cc | 9 +++++++++ .../analysis/resource_value_typed_analyzer.h | 5 +++++ .../tensorflow/analysis/side_effect_analysis.cc | 11 +++++++++-- .../mlir/tensorflow/analysis/side_effect_analysis.h | 6 ++++++ 8 files changed, 50 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 85f8644fc2d88f..b138c2d3efd598 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -676,10 +676,12 @@ cc_library( ":tensorflow", ":tensorflow_op_interfaces", ":tensorflow_side_effects", + ":tensorflow_traits", ":tensorflow_types", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:node_hash_map", + "@com_google_absl//absl/log", "@llvm-project//llvm:Support", "@llvm-project//mlir:Analysis", "@llvm-project//mlir:FuncDialect", diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc index cab89bb10b5fb9..267bc48d17e06d 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc +++ b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc @@ -20,29 +20,36 @@ limitations under the License. #include #include +#include "absl/log/log.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" #include "mlir/Analysis/CallGraph.h" // from @llvm-project #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project -#include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Block.h" // from @llvm-project +#include "mlir/IR/BuiltinAttributes.h" // from @llvm-project #include "mlir/IR/BuiltinOps.h" // from @llvm-project -#include "mlir/IR/BuiltinTypes.h" // from @llvm-project +#include "mlir/IR/OpDefinition.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/IR/ValueRange.h" // from @llvm-project #include "mlir/IR/Visitors.h" // from @llvm-project #include "mlir/Interfaces/CallInterfaces.h" // from @llvm-project +#include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project -#include "mlir/Support/LogicalResult.h" // from @llvm-project +#include "mlir/Support/TypeID.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" namespace mlir { namespace TF { diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h index 7afec29bc5df75..c49852c1864763 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h +++ b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h @@ -21,14 +21,21 @@ limitations under the License. #include #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project +#include "mlir/IR/BuiltinOps.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/IR/Region.h" // from @llvm-project +#include "mlir/IR/SymbolTable.h" // from @llvm-project #include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/Support/TypeID.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/analysis/per_function_aggregate_analysis.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/resource_dataflow.h b/tensorflow/compiler/mlir/tensorflow/analysis/resource_dataflow.h index 0cf3611af1d20c..1e68ac41d25b54 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/resource_dataflow.h +++ b/tensorflow/compiler/mlir/tensorflow/analysis/resource_dataflow.h @@ -25,9 +25,11 @@ limitations under the License. #include "llvm/Support/Debug.h" #include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h" // from @llvm-project #include "mlir/Analysis/DataFlow/SparseAnalysis.h" // from @llvm-project +#include "mlir/Analysis/DataFlowFramework.h" // from @llvm-project #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project #include "mlir/IR/SymbolTable.h" // from @llvm-project #include "mlir/IR/Value.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/resource_value_typed_analyzer.cc b/tensorflow/compiler/mlir/tensorflow/analysis/resource_value_typed_analyzer.cc index abace5111184ff..372446641382ac 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/resource_value_typed_analyzer.cc +++ b/tensorflow/compiler/mlir/tensorflow/analysis/resource_value_typed_analyzer.cc @@ -16,9 +16,18 @@ limitations under the License. #include +#include "llvm/ADT/STLExtras.h" #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project #include "mlir/IR/BuiltinAttributes.h" // from @llvm-project +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "mlir/IR/BuiltinTypes.h" // from @llvm-project +#include "mlir/IR/Region.h" // from @llvm-project +#include "mlir/IR/SymbolTable.h" // from @llvm-project +#include "mlir/IR/Types.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/Interfaces/CallInterfaces.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project #include "mlir/Transforms/RegionUtils.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h" diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/resource_value_typed_analyzer.h b/tensorflow/compiler/mlir/tensorflow/analysis/resource_value_typed_analyzer.h index 9817b290c4cbdb..738d8c1df3d395 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/resource_value_typed_analyzer.h +++ b/tensorflow/compiler/mlir/tensorflow/analysis/resource_value_typed_analyzer.h @@ -22,6 +22,11 @@ limitations under the License. #include "llvm/ADT/StringRef.h" #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project #include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/Region.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" namespace mlir { diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc index df0138a20a0c74..179b3979348161 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc +++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc @@ -26,25 +26,32 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/container/node_hash_map.h" +#include "absl/log/log.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project -#include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Block.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/BuiltinAttributes.h" // from @llvm-project #include "mlir/IR/BuiltinOps.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/SymbolTable.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project #include "mlir/IR/Value.h" // from @llvm-project #include "mlir/IR/Visitors.h" // from @llvm-project +#include "mlir/Interfaces/CallInterfaces.h" // from @llvm-project #include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project #include "mlir/Support/DebugStringHelper.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/Support/TypeID.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_dialect.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h index 97fcd30d36d02f..feb90de18857b2 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h +++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h @@ -23,12 +23,18 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project +#include "mlir/IR/BuiltinOps.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/IR/Region.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/analysis/per_function_aggregate_analysis.h" #include "tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h" namespace mlir { From 8f18c6a99aea641ddd355b3ca8bc9f61f32cee4a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 May 2024 02:03:30 -0700 Subject: [PATCH 166/478] compat: Update forward compatibility horizon to 2024-05-15 PiperOrigin-RevId: 633859960 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index f372d9e16c9363..8416badea05697 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -29,7 +29,7 @@ # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2024, 5, 14) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2024, 5, 15) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 55afb206dafb17a1e4f31b4fd0030c1f5ca625b8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 May 2024 02:03:37 -0700 Subject: [PATCH 167/478] Update GraphDef version to 1863. PiperOrigin-RevId: 633859985 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index e6d5b27b16cb2c..a60f2dd95fe679 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 1862 // Updated: 2024/5/14 +#define TF_GRAPH_DEF_VERSION 1863 // Updated: 2024/5/15 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From a77a2470aed0f56b11db21f15153fcadfaee3f15 Mon Sep 17 00:00:00 2001 From: Greg Olechwierowicz Date: Wed, 15 May 2024 02:04:45 -0700 Subject: [PATCH 168/478] [XLA:GPU] Migrate while loop extraction to utility function. PiperOrigin-RevId: 633860340 --- third_party/xla/xla/service/gpu/BUILD | 1 + .../gpu/double_buffer_loop_unrolling_test.cc | 153 +++++++----------- 2 files changed, 58 insertions(+), 96 deletions(-) diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD index 9206be42cc5ddc..3883a34d7c4b6e 100644 --- a/third_party/xla/xla/service/gpu/BUILD +++ b/third_party/xla/xla/service/gpu/BUILD @@ -5838,6 +5838,7 @@ xla_cc_test( "//xla:xla_data_proto_cc", "//xla:xla_proto_cc", "//xla/hlo/ir:hlo", + "//xla/hlo/utils:hlo_query", "//xla/service:tuple_simplifier", "//xla/tests:hlo_test_base", "//xla/tests:xla_internal_test_main", diff --git a/third_party/xla/xla/service/gpu/double_buffer_loop_unrolling_test.cc b/third_party/xla/xla/service/gpu/double_buffer_loop_unrolling_test.cc index b7ff9a24de7e2e..2d2f33f160db87 100644 --- a/third_party/xla/xla/service/gpu/double_buffer_loop_unrolling_test.cc +++ b/third_party/xla/xla/service/gpu/double_buffer_loop_unrolling_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "xla/hlo/ir/hlo_instruction.h" #include "xla/hlo/ir/hlo_module.h" #include "xla/hlo/ir/hlo_opcode.h" +#include "xla/hlo/utils/hlo_query.h" #include "xla/service/tuple_simplifier.h" #include "xla/test.h" #include "xla/tests/hlo_test_base.h" @@ -110,12 +111,8 @@ ENTRY main { EXPECT_TRUE(changed); TF_ASSERT_OK_AND_ASSIGN(changed, tuple_simp.Run(module.get())); EXPECT_TRUE(changed); - HloInstruction* while_instruction; - for (auto instr : module->entry_computation()->instructions()) { - if (instr->opcode() == HloOpcode::kWhile) { - while_instruction = instr; - } - } + HloInstruction* while_instruction = hlo_query::GetFirstInstructionWithOpcode( + *module->entry_computation(), HloOpcode::kWhile); TF_ASSERT_OK_AND_ASSIGN( WhileLoopBackendConfig config, while_instruction->backend_config()); @@ -243,12 +240,8 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(changed, tuple_simp.Run(module.get())); EXPECT_TRUE(changed); - HloInstruction* while_instruction; - for (auto instr : module->entry_computation()->instructions()) { - if (instr->opcode() == HloOpcode::kWhile) { - while_instruction = instr; - } - } + HloInstruction* while_instruction = hlo_query::GetFirstInstructionWithOpcode( + *module->entry_computation(), HloOpcode::kWhile); TF_ASSERT_OK_AND_ASSIGN( WhileLoopBackendConfig config, while_instruction->backend_config()); @@ -314,12 +307,8 @@ ENTRY main { // We expect that for the while loop, no further copy needs to be added to the // module. - HloInstruction* while_instruction; - for (auto instr : module->entry_computation()->instructions()) { - if (instr->opcode() == HloOpcode::kWhile) { - while_instruction = instr; - } - } + HloInstruction* while_instruction = hlo_query::GetFirstInstructionWithOpcode( + *module->entry_computation(), HloOpcode::kWhile); TF_ASSERT_OK_AND_ASSIGN( WhileLoopBackendConfig config, while_instruction->backend_config()); @@ -377,12 +366,8 @@ ENTRY main { EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); EXPECT_THAT(tuple_simp.Run(module.get()), IsOkAndHolds(true)); - HloInstruction* while_instruction; - for (auto instr : module->entry_computation()->instructions()) { - if (instr->opcode() == HloOpcode::kWhile) { - while_instruction = instr; - } - } + HloInstruction* while_instruction = hlo_query::GetFirstInstructionWithOpcode( + *module->entry_computation(), HloOpcode::kWhile); TF_ASSERT_OK_AND_ASSIGN( WhileLoopBackendConfig config, while_instruction->backend_config()); @@ -449,12 +434,8 @@ ENTRY main { EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); EXPECT_THAT(tuple_simp.Run(module.get()), IsOkAndHolds(true)); - HloInstruction* while_instruction; - for (auto instr : module->entry_computation()->instructions()) { - if (instr->opcode() == HloOpcode::kWhile) { - while_instruction = instr; - } - } + HloInstruction* while_instruction = hlo_query::GetFirstInstructionWithOpcode( + *module->entry_computation(), HloOpcode::kWhile); TF_ASSERT_OK_AND_ASSIGN( WhileLoopBackendConfig config, while_instruction->backend_config()); @@ -468,14 +449,14 @@ ENTRY main { HloOpcode::kAllReduceStart), 2); absl::flat_hash_set channel_ids; - for (HloInstruction* ar : while_instruction->while_body()->instructions()) { - if (ar->opcode() == HloOpcode::kAllReduceStart) { - // We expect that after unrolling, all-reduces should not have any control - // deps. - EXPECT_EQ(ar->control_predecessors().size(), 0); - channel_ids.insert(*(ar->channel_id())); - } - } + hlo_query::ForEachInstructionWithOpcode( + *while_instruction->while_body(), HloOpcode::kAllReduceStart, + [&channel_ids](HloInstruction* ar) { + // We expect that after unrolling, all-reduces should not have any + // control deps. + EXPECT_EQ(ar->control_predecessors().size(), 0); + channel_ids.insert(*(ar->channel_id())); + }); // we expect that all 2 all-reduces will have different channel ids. EXPECT_EQ(channel_ids.size(), 2); } @@ -523,12 +504,8 @@ ENTRY main { EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); EXPECT_THAT(tuple_simp.Run(module.get()), IsOkAndHolds(true)); - HloInstruction* while_instruction; - for (auto instr : module->entry_computation()->instructions()) { - if (instr->opcode() == HloOpcode::kWhile) { - while_instruction = instr; - } - } + HloInstruction* while_instruction = hlo_query::GetFirstInstructionWithOpcode( + *module->entry_computation(), HloOpcode::kWhile); TF_ASSERT_OK_AND_ASSIGN( WhileLoopBackendConfig config, while_instruction->backend_config()); @@ -540,14 +517,14 @@ ENTRY main { HloOpcode::kAllReduceStart), 10); absl::flat_hash_set channel_ids; - for (HloInstruction* ar : while_instruction->while_body()->instructions()) { - if (ar->opcode() == HloOpcode::kAllReduceStart) { - // We expect that after unrolling, all-reduces should not have any control - // deps. - EXPECT_EQ(ar->control_predecessors().size(), 0); - channel_ids.insert(*(ar->channel_id())); - } - } + hlo_query::ForEachInstructionWithOpcode( + *while_instruction->while_body(), HloOpcode::kAllReduceStart, + [&channel_ids](HloInstruction* ar) { + // We expect that after unrolling, all-reduces should not have any + // control deps. + EXPECT_EQ(ar->control_predecessors().size(), 0); + channel_ids.insert(*(ar->channel_id())); + }); // we expect that all 10 all-reduces will have different channel ids. EXPECT_EQ(channel_ids.size(), 10); } @@ -597,15 +574,13 @@ ENTRY main { absl::flat_hash_set while_loops_callees; - for (const HloComputation* computation : module->computations()) { - for (const HloInstruction* instr : computation->instructions()) { - if (instr->opcode() == HloOpcode::kWhile) { + hlo_query::ForEachInstructionWithOpcode( + *module, HloOpcode::kWhile, + [&while_loops_callees](HloInstruction* instr) { EXPECT_TRUE( while_loops_callees.insert(instr->while_condition()).second); EXPECT_TRUE(while_loops_callees.insert(instr->while_body()).second); - } - } - } + }); // We expect that the nested while loop has been duplicated, along with its // associated computations. @@ -656,15 +631,13 @@ ENTRY main { absl::flat_hash_set while_loops_callees; - for (const HloComputation* computation : module->computations()) { - for (const HloInstruction* instr : computation->instructions()) { - if (instr->opcode() == HloOpcode::kWhile) { + hlo_query::ForEachInstructionWithOpcode( + *module, HloOpcode::kWhile, + [&while_loops_callees](HloInstruction* instr) { EXPECT_TRUE( while_loops_callees.insert(instr->while_condition()).second); EXPECT_TRUE(while_loops_callees.insert(instr->while_body()).second); - } - } - } + }); // We expect that the nested while loop has been duplicated, along with its // associated computations. @@ -716,25 +689,23 @@ ENTRY main { absl::flat_hash_set while_loops_callees; - for (const HloComputation* computation : module->computations()) { - for (const HloInstruction* instr : computation->instructions()) { - if (instr->opcode() == HloOpcode::kWhile) { + hlo_query::ForEachInstructionWithOpcode( + *module, HloOpcode::kWhile, + [&while_loops_callees](HloInstruction* instr) { EXPECT_TRUE( while_loops_callees.insert(instr->while_condition()).second); EXPECT_TRUE(while_loops_callees.insert(instr->while_body()).second); - } - } - } - - for (const HloInstruction* instr : - module->entry_computation()->instructions()) { - if (instr->opcode() == HloOpcode::kWhile) { - TF_ASSERT_OK_AND_ASSIGN(WhileLoopBackendConfig config, - instr->backend_config()); - int64_t exact_trip_count = config.known_trip_count().n(); - EXPECT_EQ(exact_trip_count, 1); - } - } + }); + + hlo_query::ForEachInstructionWithOpcode( + *module->entry_computation(), HloOpcode::kWhile, + [](HloInstruction* instr) { + TF_ASSERT_OK_AND_ASSIGN( + WhileLoopBackendConfig config, + instr->backend_config()); + int64_t exact_trip_count = config.known_trip_count().n(); + EXPECT_EQ(exact_trip_count, 1); + }); // We expect that the nested while loop has been fully duplicated 10 // times. The one outer while loop still remains so that's 11 while @@ -780,19 +751,14 @@ ENTRY main { EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); int64_t num_whiles = 0; - for (const HloComputation* computation : module->computations()) { - for (const HloInstruction* instr : computation->instructions()) { - if (instr->opcode() == HloOpcode::kWhile) { - // All loops in the module should be unrolled now and have trip count - // of 5. + hlo_query::ForEachInstructionWithOpcode( + *module, HloOpcode::kWhile, [&num_whiles](HloInstruction* instr) { EXPECT_EQ(instr->backend_config() ->known_trip_count() .n(), 5); ++num_whiles; - } - } - } + }); // We expect the number of while loops to be 4 in total after unrolling. EXPECT_EQ(num_whiles, 4); } @@ -835,19 +801,14 @@ ENTRY main { EXPECT_THAT(double_buffer.Run(module.get()), IsOkAndHolds(true)); int64_t num_whiles = 0; - // TODO(olechwierowicz): We should have an abstraction in HloModule which lets - // us to iterate over instrucitons of certain kind. - for (const HloComputation* computation : module->computations()) { - for (const HloInstruction* instr : computation->instructions()) { - if (instr->opcode() == HloOpcode::kWhile) { + hlo_query::ForEachInstructionWithOpcode( + *module, HloOpcode::kWhile, [&num_whiles](HloInstruction* instr) { EXPECT_EQ(instr->backend_config() ->known_trip_count() .n(), 1); ++num_whiles; - } - } - } + }); EXPECT_EQ(num_whiles, 12); } From a77e969118f85ae5a7ae738e0f423d01c85291f3 Mon Sep 17 00:00:00 2001 From: Henning Becker Date: Wed, 15 May 2024 03:11:15 -0700 Subject: [PATCH 169/478] Fix ROCm build after TypedKernel change PiperOrigin-RevId: 633874819 --- third_party/xla/xla/stream_executor/gpu/BUILD | 1 + .../gpu/redzone_allocator_kernel_rocm.cu.cc | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/third_party/xla/xla/stream_executor/gpu/BUILD b/third_party/xla/xla/stream_executor/gpu/BUILD index b2bd00c972d26b..86e594ce824f3a 100644 --- a/third_party/xla/xla/stream_executor/gpu/BUILD +++ b/third_party/xla/xla/stream_executor/gpu/BUILD @@ -515,6 +515,7 @@ gpu_kernel_library( "//xla/stream_executor", "//xla/stream_executor:device_memory", "//xla/stream_executor:stream_executor_headers", + "//xla/stream_executor:typed_kernel_factory", "@com_google_absl//absl/status:statusor", "@local_config_rocm//rocm:rocm_headers", "@local_tsl//tsl/platform:statusor", diff --git a/third_party/xla/xla/stream_executor/gpu/redzone_allocator_kernel_rocm.cu.cc b/third_party/xla/xla/stream_executor/gpu/redzone_allocator_kernel_rocm.cu.cc index e7958b4ba4e631..c1e7200cddbff7 100644 --- a/third_party/xla/xla/stream_executor/gpu/redzone_allocator_kernel_rocm.cu.cc +++ b/third_party/xla/xla/stream_executor/gpu/redzone_allocator_kernel_rocm.cu.cc @@ -21,6 +21,7 @@ limitations under the License. #include "xla/stream_executor/kernel.h" #include "xla/stream_executor/stream_executor.h" #include "xla/stream_executor/stream_executor_pimpl.h" +#include "xla/stream_executor/typed_kernel_factory.h" #include "tsl/platform/statusor.h" namespace { @@ -38,11 +39,11 @@ namespace stream_executor { absl::StatusOr GetComparisonKernel( StreamExecutor* executor, GpuAsmOpts /*gpu_asm_opts*/) { - static auto kernel = - TypedKernel, uint8, uint64_t, - DeviceMemory>::Create(executor, "redzone_checker", - reinterpret_cast( - redzone_checker_kernel)); + static auto kernel = TypedKernelFactory< + DeviceMemory, uint8, uint64_t, + DeviceMemory>::Create(executor, "redzone_checker", + reinterpret_cast( + redzone_checker_kernel)); if (!kernel.ok()) return kernel.status(); return &kernel.value(); From 12ec42e0c07fa97d009dcd050b4a009d95f51640 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 May 2024 03:19:26 -0700 Subject: [PATCH 170/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633876272 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index a15dada136af38..2073f2e00ecd81 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/nodeserialize +go/debugonly op { name: "Abort" attr { From e3f99f4f953f870a4695718b280fdf232088288b Mon Sep 17 00:00:00 2001 From: Artsiom Ablavatski Date: Wed, 15 May 2024 03:25:10 -0700 Subject: [PATCH 171/478] Introduce reference implementation of DRQ (dynamically quantized) TransposeConv with per-channel quantization PiperOrigin-RevId: 633877445 --- .../internal/reference/transpose_conv.h | 97 ++++++++++ tensorflow/lite/kernels/transpose_conv.cc | 179 +++++++++++++++++- .../lite/kernels/transpose_conv_test.cc | 96 ++++++++++ 3 files changed, 364 insertions(+), 8 deletions(-) diff --git a/tensorflow/lite/kernels/internal/reference/transpose_conv.h b/tensorflow/lite/kernels/internal/reference/transpose_conv.h index 8a51e0fa5e9742..744ed0f826b335 100644 --- a/tensorflow/lite/kernels/internal/reference/transpose_conv.h +++ b/tensorflow/lite/kernels/internal/reference/transpose_conv.h @@ -219,6 +219,103 @@ inline void TransposeConv( } } +inline void HybridTransposeConv( + const ConvParams& params, float* scaling_factors_ptr, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const float* bias_data, + const RuntimeShape& output_shape, float* output_data, + const float* per_channel_scale, int32_t* input_offset) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + if (bias_data) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + + // Although transpose convolution simplifies to convolution with transposed + // weights for strides of 1, non-unitary striding complicates matters. To + // keep this reference implementation as clear as possible, we use a + // "scatter" access pattern, where we loop through all the input elements, + // computing their influence on the output, rather than looping through the + // output elements in the typical "gather" access pattern of a conv. We + // therefore must initialize the output array to zero. + const int num_elements = output_shape.FlatSize(); + for (int i = 0; i < num_elements; i++) { + output_data[i] = 0.0f; + } + + // Loop through input elements one at a time. + for (int batch = 0; batch < batches; ++batch) { + const float scaling_factor = scaling_factors_ptr[batch]; + for (int in_y = 0; in_y < input_height; ++in_y) { + for (int in_x = 0; in_x < input_width; ++in_x) { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { + // Loop through the output elements it will influence + const int out_x_origin = (in_x * stride_width) - pad_width; + const int out_y_origin = (in_y * stride_height) - pad_height; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + for (int out_channel = 0; out_channel < output_depth; + ++out_channel) { + // Compute output element location + const int out_x = out_x_origin + filter_x; + const int out_y = out_y_origin + filter_y; + // We cannot accumulate out of bounds + if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && + (out_y < output_height)) { + int32_t input_value = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + int32_t filter_value = + filter_data[Offset(filter_shape, out_channel, filter_y, + filter_x, in_channel)]; + int32_t acc = + (input_value - input_offset[batch]) * filter_value; + output_data[Offset(output_shape, batch, out_y, out_x, + out_channel)] += + acc * per_channel_scale[out_channel] * scaling_factor; + } + } + } + } + } + } + } + } + + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { + float acc = output_data[Offset(output_shape, batch, out_y, out_x, + out_channel)]; + if (bias_data) acc += bias_data[out_channel]; + + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = + ActivationFunctionWithMinMax(acc, output_activation_min, + output_activation_max); + } + } + } + } +} + } // namespace reference_ops } // namespace tflite diff --git a/tensorflow/lite/kernels/transpose_conv.cc b/tensorflow/lite/kernels/transpose_conv.cc index 93c6df28890c9c..7d7c9a410ef451 100644 --- a/tensorflow/lite/kernels/transpose_conv.cc +++ b/tensorflow/lite/kernels/transpose_conv.cc @@ -26,8 +26,10 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/optimized/integer_ops/transpose_conv.h" #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" // NOLINTNEXTLINE - This header file shouldn't go to the top. +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h" #include "tensorflow/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h" #include "tensorflow/lite/kernels/internal/tensor.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/internal/types.h" @@ -59,6 +61,9 @@ struct OpData { int col2im_id = kTensorNotAllocated; int transposed_weights_id = kTensorNotAllocated; int scratch_tensor_id = kTensorNotAllocated; + int input_quantized_id = kTensorNotAllocated; + int scaling_factors_id = kTensorNotAllocated; + int input_offset_id = kTensorNotAllocated; // col2im is the temporary tensor allocated and used in optimized path for // storing col2im data:gemm result for input_matrix x filter_matrix. @@ -73,6 +78,11 @@ struct OpData { // results. int32_t scratch_tensor_index; + // Indexes are used for hybrid (dynamic range quantization) path. + int32_t input_quantized_index; + int32_t scaling_factors_index; + int32_t input_offset_index; + TfLitePaddingValues padding; // The scaling factor from input to output (aka the 'real multiplier') can // be represented as a fixed point multiplier plus a left shift. @@ -160,6 +170,32 @@ static TfLiteStatus AllocateTemporaryTensorsIfRequired(TfLiteContext* context, ++temporaries_count; } + if (input_type == kTfLiteFloat32 && weights_type == kTfLiteInt8) { + // Allocate tensor to store the on-the-fly quantized inputs. + data->input_quantized_index = temporaries_count; + if (data->input_quantized_id == kTensorNotAllocated) { + TF_LITE_ENSURE_OK( + context, context->AddTensors(context, 1, &data->input_quantized_id)); + } + ++temporaries_count; + + // Allocate tensor to store the quantization params computed during + // on-the-fly input quantization. + data->scaling_factors_index = temporaries_count; + if (data->scaling_factors_id == kTensorNotAllocated) { + TF_LITE_ENSURE_OK( + context, context->AddTensors(context, 1, &data->scaling_factors_id)); + } + ++temporaries_count; + + data->input_offset_index = temporaries_count; + if (data->input_offset_id == kTensorNotAllocated) { + TF_LITE_ENSURE_OK( + context, context->AddTensors(context, 1, &data->input_offset_id)); + } + ++temporaries_count; + } + TfLiteIntArrayFree(node->temporaries); node->temporaries = TfLiteIntArrayCreate(temporaries_count); @@ -308,8 +344,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { bias->type == params->quantized_bias_type); data->quantized_bias_type = params->quantized_bias_type; } - } else { - TF_LITE_ENSURE_TYPES_EQ(context, weights->type, input->type); } TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type); // Ensure that weights and inputs have the same channel dimension. @@ -406,6 +440,69 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { data->per_channel_output_shift.data(), channels_out)); } + if (input->type == kTfLiteFloat32 && weights->type == kTfLiteInt8) { + node->temporaries->data[data->input_quantized_index] = + data->input_quantized_id; + TfLiteTensor* input_quantized; + TF_LITE_ENSURE_OK( + context, GetTemporarySafe(context, node, data->input_quantized_index, + &input_quantized)); + input_quantized->type = kTfLiteInt8; + input_quantized->allocation_type = kTfLiteArenaRw; + if (!TfLiteIntArrayEqual(input_quantized->dims, input->dims)) { + TfLiteIntArray* input_quantized_size = TfLiteIntArrayCopy(input->dims); + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, input_quantized, + input_quantized_size)); + } + + node->temporaries->data[data->scaling_factors_index] = + data->scaling_factors_id; + TfLiteTensor* scaling_factors; + TF_LITE_ENSURE_OK( + context, GetTemporarySafe(context, node, data->scaling_factors_index, + &scaling_factors)); + scaling_factors->type = kTfLiteFloat32; + scaling_factors->allocation_type = kTfLiteArenaRw; + // Only one scale factor per batch is typically necessary. See optimized + // implementation for why we need to allocate for the height of the inputs + // flattened to 2D. + const int channels_in = weights->dims->data[3]; + TF_LITE_ENSURE(context, channels_in != 0); + const int height = NumElements(input) / channels_in; + int scaling_dims[1] = {height}; + if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) { + TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); + scaling_factors_size->data[0] = height; + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors, + scaling_factors_size)); + } + + const auto* affine_quantization = + reinterpret_cast( + weights->quantization.params); + TF_LITE_ENSURE(context, affine_quantization); + TF_LITE_ENSURE(context, affine_quantization->scale); + TF_LITE_ENSURE_EQ( + context, affine_quantization->scale->size, + weights->dims->data[affine_quantization->quantized_dimension]); + node->temporaries->data[data->input_offset_index] = data->input_offset_id; + TfLiteTensor* input_offsets; + TF_LITE_ENSURE_OK(context, + GetTemporarySafe(context, node, data->input_offset_index, + &input_offsets)); + input_offsets->type = kTfLiteInt32; + input_offsets->allocation_type = kTfLiteArenaRw; + // See above comment for the need to allocate for height of inputs. + TF_LITE_ENSURE(context, channels_in != 0); + const int input_offset_dims[1] = {height}; + if (!TfLiteIntArrayEqualsArray(input_offsets->dims, 1, input_offset_dims)) { + TfLiteIntArray* input_offsets_size = TfLiteIntArrayCreate(1); + input_offsets_size->data[0] = input_offset_dims[0]; + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, input_offsets, + input_offsets_size)); + } + } + return kTfLiteOk; } @@ -617,6 +714,67 @@ void EvalQuantizedPerChannel16x8( } } +TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node, + const TfLiteTransposeConvParams* params, OpData* data, + const TfLiteTensor* input, const TfLiteTensor* weights, + const TfLiteTensor* bias, TfLiteTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRange(params->activation, &output_activation_min, + &output_activation_max); + + const int batch_size = SizeOfDimension(input, 0); + TF_LITE_ENSURE(context, batch_size != 0); + const int input_size = NumElements(input) / batch_size; + TfLiteTensor* quantized_input_tensor; + TF_LITE_ENSURE_OK(context, + GetTemporarySafe(context, node, data->input_quantized_index, + &quantized_input_tensor)); + int8_t* quantized_input_ptr_batch = + GetTensorData(quantized_input_tensor); + TfLiteTensor* scaling_factors_tensor; + TF_LITE_ENSURE_OK(context, + GetTemporarySafe(context, node, data->scaling_factors_index, + &scaling_factors_tensor)); + float* scaling_factors_ptr = GetTensorData(scaling_factors_tensor); + TfLiteTensor* input_offset_tensor; + TF_LITE_ENSURE_OK(context, + GetTemporarySafe(context, node, data->input_offset_index, + &input_offset_tensor)); + int32_t* input_offset_ptr = GetTensorData(input_offset_tensor); + + for (int b = 0; b < batch_size; ++b) { + const int offset = b * input_size; + tensor_utils::AsymmetricQuantizeFloats( + GetTensorData(input) + offset, input_size, + quantized_input_ptr_batch + offset, &scaling_factors_ptr[b], + &input_offset_ptr[b]); + } + + const auto* affine_quantization = + reinterpret_cast(weights->quantization.params); + + tflite::ConvParams op_params; + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = data->padding.width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width_offset = data->padding.width_offset; + op_params.padding_values.height_offset = data->padding.height_offset; + op_params.stride_width = params->stride_width; + op_params.stride_height = params->stride_height; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + reference_ops::HybridTransposeConv( + op_params, scaling_factors_ptr, GetTensorShape(input), + quantized_input_ptr_batch, GetTensorShape(weights), + GetTensorData(weights), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output), affine_quantization->scale->data, + input_offset_ptr); + + return kTfLiteOk; +} + template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // Retrieve tensors (All should be allocated by now) @@ -677,14 +835,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // Currently support float32, uint8, int8, int16. switch (input->type) { case kTfLiteFloat32: { - // Only for GenericOptimized path, we use transposed weights. - if (data->weights_are_transposed) { - if (!IsConstantTensor(weights)) { - ResizeAndTransposeWeights(context, weights, transposed_weights); + if (weights->type == kTfLiteInt8) { + TF_LITE_ENSURE_OK(context, EvalHybrid(context, node, params, data, + input, weights, bias, output)); + } else { + // Only for GenericOptimized path, we use transposed weights. + if (data->weights_are_transposed) { + if (!IsConstantTensor(weights)) { + ResizeAndTransposeWeights(context, weights, transposed_weights); + } } + EvalFloat(context, params, data, input, weights, bias, + transposed_weights, col2im, output); } - EvalFloat(context, params, data, input, weights, bias, - transposed_weights, col2im, output); break; } case kTfLiteUInt8: { diff --git a/tensorflow/lite/kernels/transpose_conv_test.cc b/tensorflow/lite/kernels/transpose_conv_test.cc index 84dfd29dfb9945..bbe7d3f022c39b 100644 --- a/tensorflow/lite/kernels/transpose_conv_test.cc +++ b/tensorflow/lite/kernels/transpose_conv_test.cc @@ -1251,6 +1251,102 @@ TEST_P(TransposeConvOpTest, SimpleBiasTestQuantizedPerChannel16x8Bias64) { EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 2, 3, 2})); } +class HybridTransposeConvOpModel + : public BaseTransposeConvBiasOpModel { + public: + using BaseTransposeConvBiasOpModel::BaseTransposeConvBiasOpModel; + + void SetFilter(std::initializer_list f) { + PerChannelSymmetricQuantizeAndPopulate(filter_, f); + } + + void SetBias(std::initializer_list b) { PopulateTensor(bias_, b); } + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +TEST_P(TransposeConvOpTest, SimpleTestHybridInt8) { + const std::initializer_list filter_data = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + const std::initializer_list const_filter_data = {14, 28, 42, 56, 71, + 85, 99, 113, 127}; + HybridTransposeConvOpModel model( + /*registration=*/GetRegistration(), /*output_shape_data=*/{1, 4, 4, 1}, + /*filter=*/ + {TensorType_INT8, {1, 3, 3, 1}, 0, 0, 0, 0, true, {9.0 / 127}, {0}, 0}, + /*filter_data=*/const_filter_data, + /*input=*/{TensorType_FLOAT32, {1, 4, 4, 1}}, + /*output=*/{TensorType_FLOAT32, {}}, + /*padding=*/Padding_SAME, /*stride_w=*/1, /*stride_h=*/1, + /*fused_activation=*/ActivationFunctionType_NONE, + /*test_type=*/GetTestType(), + /*version=*/3, + /*bias_type=*/TensorType_FLOAT32); + model.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + if (GetTestType() == TestType::kDynamic) { + model.SetFilter(filter_data); + } + + model.SetBias({1}); + ASSERT_EQ(model.Invoke(), kTfLiteOk); + + // The values are taken from float model "SimpleTest". + EXPECT_THAT(model.GetOutput(), ElementsAreArray(ArrayFloatNear( + {30, 63, 84, 76, 100, 193, 238, 199, 208, + 373, 417.5, 331, 263.7, 447, 486, 366.5}, + 0.19))); + + // GetOutputShape() should always be same as model.SetOutputShape(...); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 4, 4, 1})); +} + +TEST_P(TransposeConvOpTest, SimpleTestHybridInt8MultiChannel) { + const std::initializer_list filter_data = { + 1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + const std::initializer_list const_filter_data = { + 7, 22, 37, 52, 67, 82, 97, 112, 127, + 14, 28, 42, 56, 71, 85, 99, 113, 127}; + HybridTransposeConvOpModel model( + /*registration=*/GetRegistration(), /*output_shape_data=*/{1, 5, 5, 2}, + /*filter=*/ + {TensorType_INT8, + {2, 3, 3, 1}, + 0, + 0, + 0, + 0, + true, + {17.0 / 127, 18.0 / 127}, + {0, 0}, + 0}, + /*filter_data=*/const_filter_data, + /*input=*/{TensorType_FLOAT32, {1, 2, 2, 1}}, + /*output=*/{TensorType_FLOAT32, {}}, + /*padding=*/Padding_VALID, /*stride_w=*/2, /*stride_h=*/2, + /*fused_activation=*/ActivationFunctionType_NONE, + /*test_type=*/GetTestType(), + /*version=*/3, + /*bias_type=*/TensorType_FLOAT32); + + model.SetInput({1, 2, 3, 4}); + if (GetTestType() == TestType::kDynamic) { + model.SetFilter(filter_data); + } + model.SetBias({3, 4}); + + ASSERT_EQ(model.Invoke(), kTfLiteOk); + + // The values are taken from float model "MultiChannelBiasTest". + EXPECT_THAT( + model.GetOutput(), + ElementsAreArray(ArrayFloatNear( + {4, 6, 6, 8, 10, 14, 9, 12, 13, 16, 10, 12, 12, + 14, 28, 32, 21, 24, 25, 28, 19, 24, 27, 32, 64.5, 76, + 44.5, 52, 56.5, 63.5, 24, 28, 30, 34, 63.5, 72, 39, 44, 47, + 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76}, + 0.26))); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 5, 5, 2})); +} + INSTANTIATE_TEST_SUITE_P( TransposeConvOpTest, TransposeConvOpTest, ::testing::Combine( From 382d00d7b4e250586c617f0c0b59644867c4e149 Mon Sep 17 00:00:00 2001 From: gaurides Date: Wed, 15 May 2024 03:40:54 -0700 Subject: [PATCH 172/478] PR #12436: [XLA:CPU][oneDNN] Enable Matmul + BiasAdd + Elu / Tanh / Relu6 fusions for onednn Imported from GitHub PR https://github.com/openxla/xla/pull/12436 This PR enables MatMul fusions with different activations viz. Elu, Tanh and Relu6. Copybara import of the project: -- 9895b022aa66ee0f1040d0f6e557382c9dd77b99 by Gauri1 Deshpande : Enable Matmul + BiasAdd + Elu / Tanh / Relu6 fusions for onednn -- c277c70da3a1bbe05d0d637ec2d4e9ce220f7f3f by Gauri1 Deshpande : address review comments - sort name, remove default layout and remove default attributes Merging this change closes #12436 PiperOrigin-RevId: 633881207 --- third_party/xla/xla/service/cpu/BUILD | 11 + .../xla/xla/service/cpu/backend_config.proto | 2 + .../xla/xla/service/cpu/onednn_matmul.cc | 6 + .../xla/service/cpu/onednn_matmul_rewriter.cc | 122 +++++--- .../xla/service/cpu/onednn_ops_rewriter.cc | 21 +- .../xla/service/cpu/onednn_pattern_utils.h | 60 ++++ third_party/xla/xla/service/pattern_matcher.h | 1 + .../xla/xla/tests/onednn_matmul_test.cc | 275 ++++++++++++++++++ 8 files changed, 451 insertions(+), 47 deletions(-) create mode 100644 third_party/xla/xla/service/cpu/onednn_pattern_utils.h diff --git a/third_party/xla/xla/service/cpu/BUILD b/third_party/xla/xla/service/cpu/BUILD index 1d0defbdcced10..3f2bdc0f211410 100644 --- a/third_party/xla/xla/service/cpu/BUILD +++ b/third_party/xla/xla/service/cpu/BUILD @@ -1658,6 +1658,15 @@ cc_library( ] + mkl_deps(), ) +cc_library( + name = "onednn_pattern_utils", + hdrs = ["onednn_pattern_utils.h"], + visibility = ["//visibility:public"], + deps = [ + ":onednn_util", + ] + mkl_deps(), +) + cc_library( name = "onednn_matmul_rewriter", srcs = ["onednn_matmul_rewriter.cc"], @@ -1671,6 +1680,7 @@ cc_library( ":backend_config_proto_cc", ":onednn_matmul", ":onednn_memory_util", + ":onednn_pattern_utils", ":onednn_util", "//xla:executable_run_options", "//xla:shape_util", @@ -1699,6 +1709,7 @@ cc_library( deps = [ ":backend_config_proto_cc", ":onednn_memory_util", + ":onednn_pattern_utils", ":onednn_util", "//xla:status_macros", "//xla:xla_data_proto_cc", diff --git a/third_party/xla/xla/service/cpu/backend_config.proto b/third_party/xla/xla/service/cpu/backend_config.proto index d5ce59285e55c9..b63bcd5c376014 100644 --- a/third_party/xla/xla/service/cpu/backend_config.proto +++ b/third_party/xla/xla/service/cpu/backend_config.proto @@ -27,6 +27,8 @@ message OneDnnMatMulConfig { GELU_TANH = 5; BINARY_ADD = 6; LINEAR = 7; + ELU = 8; + RELU6 = 9; } repeated FusionKind fused_ops = 3; bool bias_broadcast = 4; diff --git a/third_party/xla/xla/service/cpu/onednn_matmul.cc b/third_party/xla/xla/service/cpu/onednn_matmul.cc index 04e0bd47a2fb00..4a6484ec07d405 100644 --- a/third_party/xla/xla/service/cpu/onednn_matmul.cc +++ b/third_party/xla/xla/service/cpu/onednn_matmul.cc @@ -138,6 +138,9 @@ std::unique_ptr CreateMatMulPrimDesc( case OneDnnMatMulConfig::GELU_ERF: post_ops.append_eltwise(dnnl::algorithm::eltwise_gelu_erf, 0.f, 0.f); break; + case OneDnnMatMulConfig::RELU6: + post_ops.append_eltwise(dnnl::algorithm::eltwise_clip_v2, 0.f, 6.0f); + break; case OneDnnMatMulConfig::BIAS: { bias_md = fused_mds.at(fused_operand_idx); // Extend bias rank to match result rank. @@ -156,6 +159,9 @@ std::unique_ptr CreateMatMulPrimDesc( } fused_operand_idx++; } break; + case OneDnnMatMulConfig::ELU: + post_ops.append_eltwise(dnnl::algorithm::eltwise_elu, 1.0f, 0.0f); + break; case OneDnnMatMulConfig::BINARY_ADD: { auto binary_md = fused_mds.at(fused_operand_idx); if (fused_operands_ref) { diff --git a/third_party/xla/xla/service/cpu/onednn_matmul_rewriter.cc b/third_party/xla/xla/service/cpu/onednn_matmul_rewriter.cc index cc97076a509af6..a792a5ba52130d 100644 --- a/third_party/xla/xla/service/cpu/onednn_matmul_rewriter.cc +++ b/third_party/xla/xla/service/cpu/onednn_matmul_rewriter.cc @@ -28,6 +28,7 @@ limitations under the License. #include "xla/service/cpu/backend_config.pb.h" #include "xla/service/cpu/onednn_matmul.h" #include "xla/service/cpu/onednn_memory_util.h" +#include "xla/service/cpu/onednn_pattern_utils.h" #include "xla/service/cpu/onednn_util.h" #include "xla/service/hlo_cost_analysis.h" #include "xla/service/pattern_matcher.h" @@ -40,6 +41,7 @@ namespace cpu { namespace { namespace m = match; +namespace pu = ::xla::cpu::onednn_pattern_utils_internal; inline Status ValidateDotDimensionNumbers( const DotDimensionNumbers& dim_numbers) { @@ -63,26 +65,6 @@ inline bool CompatibleElementType(const HloInstruction* instr) { return element_type == BF16 || element_type == F32 || element_type == F16; } -// Type conversion from and to any of BF16 and FP32. -// TODO(intel-tf): Support more types when enabled. -template -inline auto SupportedConvert(Pattern pattern) { - auto supported_convert = [](const HloInstruction* instr) -> bool { - return CompatibleElementType(instr) && - CompatibleElementType(instr->operand(0)); - }; - return m::Convert(pattern).WithPredicate(supported_convert); -} - -template -inline auto SupportedConvert(HloInstruction** convert, Pattern pattern) { - auto supported_convert = [](const HloInstruction* instr) -> bool { - return CompatibleElementType(instr) && - CompatibleElementType(instr->operand(0)); - }; - return m::Convert(convert, pattern).WithPredicate(supported_convert); -} - inline bool IsRowMajor(const Shape& shape) { return LayoutUtil::IsMonotonicWithDim0Major(shape.layout()); } @@ -117,9 +99,9 @@ auto ElementwiseSafeIntermediates(HloInstruction** instr, m::Slice(instr, pattern.WithOneUser()), m::Bitcast(instr, pattern.WithOneUser()), m::Reshape(instr, pattern.WithOneUser()), - SupportedConvert(instr, pattern.WithOneUser()), - SupportedConvert(instr, BitcastWithReshapeSemantics( - optional_bitcast, pattern.WithOneUser())), + pu::SupportedConvert(instr, pattern.WithOneUser()), + pu::SupportedConvert(instr, BitcastWithReshapeSemantics( + optional_bitcast, pattern.WithOneUser())), pattern); } @@ -140,6 +122,10 @@ inline auto BcastConstScalar(double value) { return BcastConstScalar(nullptr, value); } +inline auto BcastConvertConstScalar(double value) { + return m::Broadcast(pu::OptionalConvert(m::ConstantScalar(value))); +} + inline bool IsBatchDot(const HloInstruction& instr) { if (auto* dot_instr = DynCast(&instr)) { return dot_instr->dot_dimension_numbers().lhs_batch_dimensions_size() > 0; @@ -368,7 +354,7 @@ inline auto OptionalConvertAndBitcast(HloInstruction** optional_convert, // 3. pattern-root -> bitcast // 4. pattern-root auto common = m::AnyOf( - SupportedConvert(optional_convert, std::move(pattern).WithOneUser()) + pu::SupportedConvert(optional_convert, std::move(pattern).WithOneUser()) .WithOperand(0, m::Op().WithElementType(PrimitiveType::BF16)) .WithElementType(PrimitiveType::F32), std::move(pattern).WithOneUser()); @@ -562,7 +548,8 @@ class OneDnnMatMulRewriteVisitor : public DfsHloRewriteVisitor { } // Validate addend for fusion. - if (CompatibleElementType(addend) && IsOperandFusible(addend, dot)) { + if (IsSupportedType(addend->shape().element_type()) && + IsOperandFusible(addend, dot)) { new_operands.push_back(addend); } else { return OkStatus(); @@ -647,6 +634,71 @@ class OneDnnMatMulRewriteVisitor : public DfsHloRewriteVisitor { return OkStatus(); } + auto ELUActivation(HloInstruction* instr, HloInstruction** src) { + // Reference: tensorflow/compiler/tf2xla/kernels/elu_op.cc + // const auto zero = ScalarLike(x, 0); + // const auto pred = Gt(x, zero); + // const auto expm1 = Expm1(x); + // return Select(pred, x, expm1); + auto pattern = m::Select( + m::Gt(pu::OptionalConvert(m::Op(src)), BcastConvertConstScalar(0)), + m::Op(src), + pu::OptionalConvert(m::Expm1(pu::OptionalConvert(m::Op(src))))); + return Match(instr, pattern); + } + + Status HandleSelect(HloInstruction* instr) override { + HloInstruction* matmul_call; + HloInstruction* intermediate_instr = nullptr; + HloInstruction* optional_bitcast = nullptr; + HloInstruction* src; + // Attempt to elide ELU subgraph and fuse ELU activation into GEMM, + // including when slicing or bitcasting is applied to the result. + if (ELUActivation(instr, &src)) { + if (Match(src, ElementwiseSafeIntermediates( + &intermediate_instr, &optional_bitcast, + OneDnnMatmulInstr(&matmul_call)))) { + return FuseActivation(OneDnnMatMulConfig::ELU, instr, matmul_call, + intermediate_instr); + } + } + return OkStatus(); + } + + Status HandleTanh(HloInstruction* instr) override { + HloInstruction* matmul_call; + HloInstruction* intermediate_instr = nullptr; + HloInstruction* optional_bitcast = nullptr; + // Attempt to elide Tanh and fuse Tanh activation into GEMM, including + // when slicing or bitcasting is applied to the result. + if (Match(instr, m::Tanh(ElementwiseSafeIntermediates( + &intermediate_instr, &optional_bitcast, + OneDnnMatmulInstr(&matmul_call)) + .WithOneUser()))) { + return FuseActivation(OneDnnMatMulConfig::TANH, instr, matmul_call, + intermediate_instr); + } + return OkStatus(); + } + + Status HandleClamp(HloInstruction* instr) override { + HloInstruction* matmul_call; + HloInstruction* intermediate_instr = nullptr; + HloInstruction* optional_bitcast = nullptr; + // Attempt to elide RELU6 and fuse RELU6 activation into GEMM, including + // when slicing or bitcasting is applied to the result. + if (Match(instr, m::Clamp(BcastConstScalar(0), + ElementwiseSafeIntermediates( + &intermediate_instr, &optional_bitcast, + OneDnnMatmulInstr(&matmul_call)) + .WithOneUser(), + BcastConstScalar(6)))) { + return FuseActivation(OneDnnMatMulConfig::RELU6, instr, matmul_call, + intermediate_instr); + } + return OkStatus(); + } + Status HandleMultiply(HloInstruction* instr) override { HloInstruction* matmul_call; HloInstruction* intermediate_instr = nullptr; @@ -664,16 +716,16 @@ class OneDnnMatMulRewriteVisitor : public DfsHloRewriteVisitor { HloInstruction *dot, *constant; HloInstruction* optional_convert = nullptr; - auto pattern = - m::Op(&instr) - .WithOpcode(HloOpcode::kMultiply) - .WithBinaryOperandsAnyOrder( - m::AnyOf( - SupportedConvert(&optional_convert, OneDnnMatmulInstr(&dot)) - .WithElementType(PrimitiveType::F32), - OneDnnMatmulInstr(&dot)) - .WithOneUser(), - m::Broadcast(m::Constant(&constant))); + auto pattern = m::Op(&instr) + .WithOpcode(HloOpcode::kMultiply) + .WithBinaryOperandsAnyOrder( + m::AnyOf( + pu::SupportedConvert(&optional_convert, + OneDnnMatmulInstr(&dot)) + .WithElementType(PrimitiveType::F32), + OneDnnMatmulInstr(&dot)) + .WithOneUser(), + m::Broadcast(m::Constant(&constant))); if (Match(instr, pattern)) { std::vector new_operands; diff --git a/third_party/xla/xla/service/cpu/onednn_ops_rewriter.cc b/third_party/xla/xla/service/cpu/onednn_ops_rewriter.cc index 058355223cd5ee..06c2137a782d50 100644 --- a/third_party/xla/xla/service/cpu/onednn_ops_rewriter.cc +++ b/third_party/xla/xla/service/cpu/onednn_ops_rewriter.cc @@ -19,6 +19,7 @@ limitations under the License. #include "xla/hlo/ir/hlo_instruction.h" #include "xla/service/cpu/backend_config.pb.h" #include "xla/service/cpu/onednn_memory_util.h" +#include "xla/service/cpu/onednn_pattern_utils.h" #include "xla/service/cpu/onednn_util.h" #include "xla/service/pattern_matcher.h" #include "xla/status_macros.h" @@ -28,11 +29,7 @@ namespace cpu { namespace { namespace m = match; - -template -auto OptionalConvert(Pattern pattern) { - return m::AnyOf(m::Convert(pattern), std::move(pattern)); -} +namespace pu = ::xla::cpu::onednn_pattern_utils_internal; inline auto OneDnnConvertibleInstr(HloInstruction** instr) { return m::AnyOf(m::CustomCall(instr, {"__onednn$layernorm"}), @@ -101,9 +98,9 @@ std::optional MatchSoftmax(HloInstruction* instr) { if (!Match(instr, m::Divide( m::Exp(&left_exponential, m::Op()), - m::Broadcast(m::Reshape( - m::Broadcast(OptionalConvert(m::Reshape(OptionalConvert( - m::Reduce(OptionalConvert( + m::Broadcast(m::Reshape(m::Broadcast( + pu::OptionalConvert(m::Reshape(pu::OptionalConvert( + m::Reduce(pu::OptionalConvert( m::Exp(&right_exponential, m::Op())), m::Op()) .WithPredicate([](const HloInstruction* reduce) { @@ -256,7 +253,7 @@ bool MatchFlaxLayerNorm(HloInstruction* instr, HloInstruction** src, .WithBinaryOperandsAnyOrder( m::Op(&hinge).WithOneUser(), m::Subtract( - OptionalConvert(m::Op(&prod_s)), + pu::OptionalConvert(m::Op(&prod_s)), m::Broadcast( m::Reshape( m::Broadcast(m::Reshape(m::Op(&div_red).WithOpcode( @@ -326,8 +323,8 @@ bool MatchFlaxLayerNorm(HloInstruction* instr, HloInstruction** src, auto div_red_mul_src = m::Divide() .WithOperand(0, m::Reduce(m::Multiply().WithBinaryOperandsAnyOrder( - OptionalConvert(m::Op(&mul_in0)), - OptionalConvert(m::Op(&mul_in1))), + pu::OptionalConvert(m::Op(&mul_in0)), + pu::OptionalConvert(m::Op(&mul_in1))), m::Constant()) .WithPredicate([](const HloInstruction* reduce) { HloComputation* reducer = reduce->to_apply(); @@ -348,7 +345,7 @@ bool MatchFlaxLayerNorm(HloInstruction* instr, HloInstruction** src, m::Divide() .WithOperand( 0, - m::Reduce(OptionalConvert(m::Op(&reduce_in0)), m::Constant()) + m::Reduce(pu::OptionalConvert(m::Op(&reduce_in0)), m::Constant()) .WithPredicate([](const HloInstruction* reduce) { HloComputation* reducer = reduce->to_apply(); return (reducer->root_instruction()->opcode() == diff --git a/third_party/xla/xla/service/cpu/onednn_pattern_utils.h b/third_party/xla/xla/service/cpu/onednn_pattern_utils.h new file mode 100644 index 00000000000000..6635d79d115938 --- /dev/null +++ b/third_party/xla/xla/service/cpu/onednn_pattern_utils.h @@ -0,0 +1,60 @@ +/* Copyright 2024 The OpenXLA Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_SERVICE_CPU_ONEDNN_PATTERN_UTILS_H_ +#define XLA_SERVICE_CPU_ONEDNN_PATTERN_UTILS_H_ +#if defined(INTEL_MKL) && defined(ENABLE_ONEDNN_V3) + +#include "xla/hlo/ir/hlo_instruction.h" +#include "xla/hlo/ir/hlo_instructions.h" +#include "xla/service/cpu/onednn_util.h" +#include "xla/service/pattern_matcher.h" + +namespace xla { +namespace cpu { + +namespace onednn_pattern_utils_internal { +namespace m = match; + +template +auto OptionalConvert(Pattern pattern) { + return m::AnyOf(m::Convert(pattern), std::move(pattern)); +} + +// Type conversion from and to any of BF16 and FP32. +// TODO(intel-tf): Support more types when enabled. +template +inline auto SupportedConvert(Pattern pattern) { + auto supported_convert = [](const HloInstruction* instr) -> bool { + return IsSupportedType(instr->shape().element_type()) && + IsSupportedType(instr->operand(0)->shape().element_type()); + }; + return m::Convert(pattern).WithPredicate(supported_convert); +} + +template +inline auto SupportedConvert(HloInstruction** convert, Pattern pattern) { + auto supported_convert = [](const HloInstruction* instr) -> bool { + return IsSupportedType(instr->shape().element_type()) && + IsSupportedType(instr->operand(0)->shape().element_type()); + }; + return m::Convert(convert, pattern).WithPredicate(supported_convert); +} +} // namespace onednn_pattern_utils_internal +} // namespace cpu +} // namespace xla + +#endif // INTEL_MKL && ENABLE_ONEDNN_V3 +#endif // XLA_SERVICE_CPU_ONEDNN_PATTERN_UTILS_H_ diff --git a/third_party/xla/xla/service/pattern_matcher.h b/third_party/xla/xla/service/pattern_matcher.h index 477a63e2a6f91d..b17c53a9baf699 100644 --- a/third_party/xla/xla/service/pattern_matcher.h +++ b/third_party/xla/xla/service/pattern_matcher.h @@ -2687,6 +2687,7 @@ XLA_UNOP_PATTERN(CollectivePermuteStart) XLA_UNOP_PATTERN(CollectivePermuteDone) XLA_UNOP_PATTERN(Domain) XLA_UNOP_PATTERN(Exp) +XLA_UNOP_PATTERN(Expm1) XLA_UNOP_PATTERN(Fft) XLA_UNOP_PATTERN(Floor) XLA_UNOP_PATTERN(GetTupleElement) diff --git a/third_party/xla/xla/tests/onednn_matmul_test.cc b/third_party/xla/xla/tests/onednn_matmul_test.cc index 22089f177e3666..2248b05f5ddcdf 100644 --- a/third_party/xla/xla/tests/onednn_matmul_test.cc +++ b/third_party/xla/xla/tests/onednn_matmul_test.cc @@ -81,6 +81,34 @@ class MatmulTest : public HloTestBase { ; CHECK-DAG: } ; CHECK: } )"; + + const char* fused_matmul_bias_elu_rewrite_str_ = R"( + ; CHECK: custom_call_target="__onednn$matmul", + ; CHECK: backend_config={ + ; CHECK-DAG: "outer_dimension_partitions":[], + ; CHECK-DAG: "onednn_matmul_config":{ + ; CHECK-DAG: "fused_ops":["BIAS","ELU"] + ; CHECK-DAG: } + ; CHECK: } + )"; + const char* fused_matmul_bias_tanh_rewrite_str_ = R"( + ; CHECK: custom_call_target="__onednn$matmul", + ; CHECK: backend_config={ + ; CHECK-DAG: "outer_dimension_partitions":[], + ; CHECK-DAG: "onednn_matmul_config":{ + ; CHECK-DAG: "fused_ops":["BIAS","TANH"] + ; CHECK-DAG: } + ; CHECK: } + )"; + const char* fused_matmul_bias_relu6_rewrite_str_ = R"( + ; CHECK: custom_call_target="__onednn$matmul", + ; CHECK: backend_config={ + ; CHECK-DAG: "outer_dimension_partitions":[], + ; CHECK-DAG: "onednn_matmul_config":{ + ; CHECK-DAG: "fused_ops":["BIAS","RELU6"] + ; CHECK-DAG: } + ; CHECK: } + )"; }; TEST_F(MatmulTest, SimpleTestF32) { @@ -923,6 +951,117 @@ TEST_F(MatmulTest, TestF32ConstantWeights) { )"); } +// MM + BiasAdd + Elu : FP32 +TEST_F(MatmulTest, BiasAddELUFusion_F32) { + const char* matmul_module_str = R"( + HloModule matmul.test.f32 + + ENTRY matmul.test.f32 { + arg0.1 = f32[1024,1024] parameter(0) + arg1.2 = f32[1024,1024] parameter(1) + dot.3 = f32[1024,1024] dot(arg1.2, arg0.1), lhs_contracting_dims={1}, rhs_contracting_dims={0} + arg2.4 = f32[1024] parameter(2) + broadcast.5 = f32[1024,1024] broadcast(arg2.4), dimensions={1} + add.6 = f32[1024,1024] add(dot.3, broadcast.5) + constant.7 = f32[] constant(0) + broadcast.8 = f32[1024,1024] broadcast(constant.7), dimensions={} + compare.9 = pred[1024,1024] compare(add.6, broadcast.8), direction=GT + exponential-minus-one.10 = f32[1024,1024] exponential-minus-one(add.6) + ROOT select.11 = f32[1024,1024] select(compare.9, add.6, exponential-minus-one.10) + })"; + + EXPECT_TRUE(RunAndCompare(matmul_module_str, ErrorSpec{1e-4, 1e-4})); + MatchOptimizedHlo(matmul_module_str, fused_matmul_bias_elu_rewrite_str_); +} + +// MM + BiasAdd + Elu : BF16 +TEST_F(MatmulTest, BiasAddELUFusion_BF16) { + if (!IsSupportedType(PrimitiveType::BF16)) { + GTEST_SKIP() << "CPU does not support BF16."; + } + const char* matmul_module_str = R"( + HloModule matmul.test.bf16 + ENTRY matmul.test.bf16 { + arg0.1 = f32[1024,512] parameter(0) + convert.2 = bf16[1024,512] convert(arg0.1) + arg1.3 = f32[256,512] parameter(1) + convert.4 = bf16[256,512] convert(arg1.3) + dot.5 = bf16[1024,256] dot(convert.2, convert.4), lhs_contracting_dims={1}, rhs_contracting_dims={1} + convert.6 = f32[1024,256] convert(dot.5) + arg2.7 = f32[256] parameter(2) + broadcast.8 = f32[1024,256] broadcast(arg2.7), dimensions={1} + add.9 = f32[1024,256] add(convert.6, broadcast.8) + constant.10 = f32[] constant(0) + broadcast.11 = f32[1024,256] broadcast(constant.10), dimensions={} + compare.12 = pred[1024,256] compare(add.9, broadcast.11), direction=GT + convert.13 = bf16[1024,256] convert(add.9) + exponential-minus-one.14 = f32[1024,256] exponential-minus-one(add.9) + convert.15 = bf16[1024,256] convert(exponential-minus-one.14) + select.16 = bf16[1024,256] select(compare.12, convert.13, convert.15) + ROOT convert.17 = f32[1024,256] convert(select.16) + })"; + + EXPECT_TRUE(RunAndCompare(matmul_module_str, ErrorSpec{1e-2, 1e-2})); + MatchOptimizedHlo(matmul_module_str, fused_matmul_bias_elu_rewrite_str_); +} + +// MM + BiasAdd + Elu : F16 +TEST_F(MatmulTest, BiasAddELUFusion_F16) { + if (!IsSupportedType(PrimitiveType::F16)) { + GTEST_SKIP() << "CPU does not support F16."; + } + const char* matmul_module_str = R"( + HloModule matmul.test.f16 + + ENTRY matmul.test.f16 { + arg0.1 = f16[1024,1024] parameter(0) + arg1.2 = f16[1024,1024] parameter(1) + dot.3 = f16[1024,1024] dot(arg1.2, arg0.1), lhs_contracting_dims={1}, rhs_contracting_dims={0} + arg2.4 = f16[1024] parameter(2) + broadcast.5 = f16[1024,1024] broadcast(arg2.4), dimensions={1} + add.6 = f16[1024,1024] add(dot.3, broadcast.5) + constant.7 = f16[] constant(0) + broadcast.8 = f16[1024,1024] broadcast(constant.7), dimensions={} + compare.9 = pred[1024,1024] compare(add.6, broadcast.8), direction=GT + exponential-minus-one.10 = f16[1024,1024] exponential-minus-one(add.6) + ROOT select.11 = f16[1024,1024] select(compare.9, add.6, exponential-minus-one.10) + })"; + + EXPECT_TRUE(RunAndCompare(matmul_module_str, ErrorSpec{1e-2, 1e-2})); + MatchOptimizedHlo(matmul_module_str, fused_matmul_bias_elu_rewrite_str_); +} + +// MM + BiasAdd + Elu + MM : FP16_2 +TEST_F(MatmulTest, BiasAddELUFusion_F16_2) { + if (!IsSupportedType(PrimitiveType::F16)) { + GTEST_SKIP() << "CPU does not support F16."; + } + const char* matmul_module_str = R"( + HloModule matmul.test.f16 + + ENTRY matmul.test.f16 { + arg0.1 = f32[1024,1024] parameter(0) + convert.2 = f16[1024,1024] convert(arg0.1) + arg1.3 = f32[1024,1024] parameter(2) + convert.4 = f16[1024,1024] convert(arg1.3) + dot.5 = f16[1024,1024] dot(convert.2, convert.4), lhs_contracting_dims={1}, rhs_contracting_dims={0} + arg2.6 = f32[1024] parameter(1) + convert.7 = f16[1024] convert(arg2.6) + broadcast.8 = f16[1024,1024] broadcast(convert.7), dimensions={1} + add.9 = f16[1024,1024] add(dot.5, broadcast.8) + constant.10 = f16[] constant(0) + broadcast.11 = f16[1024,1024] broadcast(constant.10), dimensions={} + compare.12 = pred[1024,1024] compare(add.9, broadcast.11), direction=GT + exponential-minus-one.13 = f16[1024,1024] exponential-minus-one(add.9) + select.14 = f16[1024,1024] select(compare.12, add.9, exponential-minus-one.13) + dot.15 = f16[1024,1024] dot(select.14, convert.4), lhs_contracting_dims={1}, rhs_contracting_dims={0} + ROOT convert.16 = f32[1024,1024] convert(dot.15) + })"; + + EXPECT_TRUE(RunAndCompare(matmul_module_str, ErrorSpec{1e-2, 1e-2})); + MatchOptimizedHlo(matmul_module_str, fused_matmul_bias_elu_rewrite_str_); +} + TEST_F(MatmulTest, SimpleTestBF16Gemv1) { if (!IsSupportedType(PrimitiveType::BF16)) { GTEST_SKIP() << "CPU does not support BF16."; @@ -1010,6 +1149,142 @@ TEST_F(MatmulTest, SimpleTestF32WithMulAndAddFusion) { )"); } +TEST_F(MatmulTest, BiasAddTanhFusionTest_F32) { + const char* matmul_module_str = R"( + HloModule matmul.bias.tanh.test.f32 + ENTRY matmul.bias.tanh.test.f32 { + arg.0 = f32[32,32,40,30] parameter(0) + arg.1 = f32[32,32,30,40] parameter(1) + dot.2 = f32[32,32,40,40] dot(arg.0, arg.1), lhs_batch_dims={0,1}, lhs_contracting_dims={3}, rhs_batch_dims={0,1}, rhs_contracting_dims={2} + const.3 = f32[40] constant(15) + bcast.4 = f32[32,32,40,40] broadcast(const.3), dimensions={3} + add.5 = f32[32,32,40,40] add(dot.2, bcast.4) + tanh.6 = f32[32,32,40,40] tanh(add.5) + tuple.7 = (f32[32,32,40,40]) tuple(tanh.6) + ROOT get-tuple-element.8 = f32[32,32,40,40] get-tuple-element(tuple.7), index=0 + })"; + + EXPECT_TRUE(RunAndCompare(matmul_module_str, ErrorSpec{1e-4, 1e-4})); + MatchOptimizedHlo(matmul_module_str, fused_matmul_bias_tanh_rewrite_str_); +} + +TEST_F(MatmulTest, BiasAddTanhFusionTest_BF16) { + if (!IsSupportedType(PrimitiveType::BF16)) { + GTEST_SKIP() << "CPU does not support BF16."; + } + const char* matmul_module_str = R"( + HloModule matmul.bias.tanh.test.f32 + ENTRY matmul.bias.tanh.test.f32 { + arg0.1 = f32[1024,512] parameter(0) + convert.2 = bf16[1024,512] convert(arg0.1) + arg1.3 = f32[256,512] parameter(1) + convert.4 = bf16[256,512] convert(arg1.3) + dot.5 = bf16[1024,256] dot(convert.2, convert.4), lhs_contracting_dims={1}, rhs_contracting_dims={1} + convert.6 = f32[1024,256] convert(dot.5) + arg2.7 = f32[256] parameter(2) + broadcast.8 = f32[1024,256] broadcast(arg2.7), dimensions={1} + add.9 = f32[1024,256] add(convert.6, broadcast.8) + ROOT tanh.10 = f32[1024,256] tanh(add.9) + })"; + EXPECT_TRUE(RunAndCompare(matmul_module_str, ErrorSpec{1e-2, 1e-2})); + MatchOptimizedHlo(matmul_module_str, fused_matmul_bias_tanh_rewrite_str_); +} + +TEST_F(MatmulTest, BiasAddTanhFusionTest_F16) { + if (!IsSupportedType(PrimitiveType::F16)) { + GTEST_SKIP() << "CPU does not support F16."; + } + const char* matmul_module_str = R"( + HloModule matmul.bias.tanh.test.f16 + ENTRY matmul.bias.tanh.test.f16 { + arg0.1 = f16[1024,1024] parameter(0) + arg1.2 = f16[1024,1024] parameter(1) + dot.3 = f16[1024,1024] dot(arg1.2, arg0.1), lhs_contracting_dims={1}, rhs_contracting_dims={0} + arg2.4 = f16[1024] parameter(2) + broadcast.5 = f16[1024,1024] broadcast(arg2.4), dimensions={1} + add.6 = f16[1024,1024] add(dot.3, broadcast.5) + ROOT tanh.7 = f16[1024,1024] tanh(add.6) + })"; + + EXPECT_TRUE(RunAndCompare(matmul_module_str, ErrorSpec{1e-4, 1e-4})); + MatchOptimizedHlo(matmul_module_str, fused_matmul_bias_tanh_rewrite_str_); +} + +// Test MM + BiasAdd + Relu6 fusion : F32 +TEST_F(MatmulTest, BiasAddRelu6Fusion_F32) { + const char* matmul_module_str = R"( + HloModule matmul.bias.relu6.test.f32 + ENTRY matmul.bias.relu6.test.f32 { + constant.1 = f32[] constant(0) + broadcast.2 = f32[1024,1024] broadcast(constant.1), dimensions={} + arg1.3 = f32[1024,1024] parameter(1) + arg2.4 = f32[1024,1024] parameter(0) + dot.5 = f32[1024,1024] dot(arg1.3, arg2.4), lhs_contracting_dims={1}, rhs_contracting_dims={0} + arg3.6 = f32[1024] parameter(2) + broadcast.7 = f32[1024,1024] broadcast(arg3.6), dimensions={1} + add.8 = f32[1024,1024] add(dot.5, broadcast.7) + constant.9 = f32[] constant(6) + broadcast.10 = f32[1024,1024] broadcast(constant.9), dimensions={} + ROOT clamp.11 = f32[1024,1024] clamp(broadcast.2, add.8, broadcast.10) + })"; + + EXPECT_TRUE(RunAndCompare(matmul_module_str, ErrorSpec{1e-2, 1e-2})); + MatchOptimizedHlo(matmul_module_str, fused_matmul_bias_relu6_rewrite_str_); +} + +// Test MM + BiasAdd + Relu6 fusion : BF16 +TEST_F(MatmulTest, BiasAddRelu6Fusion_BF16) { + if (!IsSupportedType(PrimitiveType::BF16)) { + GTEST_SKIP() << "CPU does not support BF16."; + } + const char* matmul_module_str = R"( + HloModule matmul.bias.relu6.test.bf16 + ENTRY matmul.bias.relu6.test.bf16 { + constant.1 = f32[] constant(0) + broadcast.2 = f32[1024,256] broadcast(constant.1), dimensions={} + arg0.3 = f32[1024,512] parameter(0) + convert.4 = bf16[1024,512] convert(arg0.3) + arg1.5 = f32[256,512] parameter(1) + convert.6 = bf16[256,512] convert(arg1.5) + dot.7 = bf16[1024,256] dot(convert.4, convert.6), lhs_contracting_dims={1}, rhs_contracting_dims={1} + convert.8 = f32[1024,256] convert(dot.7) + arg2.9 = f32[256] parameter(2) + broadcast.10 = f32[1024,256] broadcast(arg2.9), dimensions={1} + add.11 = f32[1024,256] add(convert.8, broadcast.10) + constant.12 = f32[] constant(6) + broadcast.13 = f32[1024,256] broadcast(constant.12), dimensions={} + ROOT clamp.14 = f32[1024,256] clamp(broadcast.2, add.11, broadcast.13) + })"; + + EXPECT_TRUE(RunAndCompare(matmul_module_str, ErrorSpec{1e-2, 1e-2})); + MatchOptimizedHlo(matmul_module_str, fused_matmul_bias_relu6_rewrite_str_); +} + +// Test MM + BiasAdd + Relu6 fusion : F16 +TEST_F(MatmulTest, BiasAddRelu6Fusion_F16) { + if (!IsSupportedType(PrimitiveType::F16)) { + GTEST_SKIP() << "CPU does not support F16."; + } + const char* matmul_module_str = R"( + HloModule matmul.bias.relu6.test.f16 + ENTRY matmul.bias.relu6.test.f16 { + constant.1 = f16[] constant(0) + broadcast.2 = f16[1024,1024] broadcast(constant.7), dimensions={} + arg0.3 = f16[1024,1024] parameter(0) + arg1.4 = f16[1024,1024] parameter(1) + dot.5 = f16[1024,1024] dot(arg1.4, arg0.3), lhs_contracting_dims={1}, rhs_contracting_dims={0} + arg2.6 = f16[1024] parameter(2) + broadcast.7 = f16[1024,1024] broadcast(arg2.6), dimensions={1} + add.8 = f16[1024,1024] add(dot.5, broadcast.7) + constant.9 = f16[] constant(6) + broadcast.10 = f16[1024,1024] broadcast(constant.9), dimensions={} + ROOT clamp.11 = f16[1024,1024] clamp(broadcast.2, add.8, broadcast.10) + })"; + + EXPECT_TRUE(RunAndCompare(matmul_module_str, ErrorSpec{1e-4, 1e-4})); + MatchOptimizedHlo(matmul_module_str, fused_matmul_bias_relu6_rewrite_str_); +} + TEST_F(MatmulTest, SimpleTestBF16WithMulAndAddFusion) { if (!IsSupportedType(PrimitiveType::BF16)) { GTEST_SKIP() << "CPU does not support BF16."; From 99ddf8b8239d4955de73917d5cfa179e648628a2 Mon Sep 17 00:00:00 2001 From: Mohammed Anany Date: Wed, 15 May 2024 04:31:17 -0700 Subject: [PATCH 173/478] [Triton] Bring back a test to show that TF32 is not supported for 8-bit or less types with F32. PiperOrigin-RevId: 633894011 --- .../xla/service/gpu/ir_emitter_triton_test.cc | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc index 972b5cb57c8b6c..a9a0ed4ba69e9d 100644 --- a/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc +++ b/third_party/xla/xla/service/gpu/ir_emitter_triton_test.cc @@ -5204,6 +5204,46 @@ ENTRY entry { "Triton support is only enabled for Ampere GPUs and up."))); } +// This test could be modified to allow TF32 once this bug is fixed. +// TODO(b/320659359) Allow TF32 for 8-bit or less types with F32. +TEST_F(TritonFilecheckTest, NoTF32For8BitOrLessWithF32) { + const std::string hlo_text = R"( +HloModule t + +triton_dot { + parameter_0 = s32[11,24]{1,0} parameter(0) + broadcast.1747 = s32[11,24,128]{2,1,0} broadcast(parameter_0), + dimensions={0,1} parameter_1 = s32[11,24,128]{2,1,0} parameter(1) + compare.49 = pred[11,24,128]{2,1,0} compare(broadcast.1747, parameter_1), + direction=EQ bitcast.4717 = pred[264,128]{1,0} bitcast(compare.49) + convert.142 = f32[264,128]{1,0} convert(bitcast.4717) + parameter_2 = f32[128,8]{1,0} parameter(2) + ROOT dot.381 = f32[264,8]{1,0} dot(convert.142, parameter_2), + lhs_contracting_dims={1}, rhs_contracting_dims={0} +} + +ENTRY e { + p0 = s32[11,24]{1,0} parameter(0) + p1 = s32[11,24,128]{2,1,0} parameter(1) + p2 = f32[128,8]{1,0} parameter(2) + ROOT _ = f32[264,8] fusion(p0, p1, p2), kind=kCustom, calls=triton_dot, + backend_config={"fusion_backend_config": {kind: "__triton_gemm", + triton_gemm_config: + {"block_m":32,"block_n":16,"block_k":128, + "split_k":1,"num_stages":1,"num_warps":4, + "num_ctas":1}}} +})"; + + TritonGemmConfig config(32, 16, 128, 1, 1, 4); + ASSERT_OK( + CreateTritonIrAndFileCheck(hlo_text, config, EmitMatMul, "triton_dot", R"( +CHECK: tt.dot +CHECK-NOT: inputPrecision = tf32 + )")); + + EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec{/*aabs=*/1e-3, /*arel=*/1e-3})); +} + } // namespace } // namespace gpu } // namespace xla From d1d3cc601529cf4a4a580c42eacf951adfd3ba8b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 May 2024 05:14:14 -0700 Subject: [PATCH 174/478] Automated Code Change PiperOrigin-RevId: 633909924 --- .../lite/tools/evaluation/stages/utils/image_metrics_test.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/lite/tools/evaluation/stages/utils/image_metrics_test.cc b/tensorflow/lite/tools/evaluation/stages/utils/image_metrics_test.cc index 11d71d663fff22..5e95165081781c 100644 --- a/tensorflow/lite/tools/evaluation/stages/utils/image_metrics_test.cc +++ b/tensorflow/lite/tools/evaluation/stages/utils/image_metrics_test.cc @@ -20,7 +20,6 @@ limitations under the License. #include #include -#include #include namespace tflite { From 6895571f33ff1dc7275ca6abe211f559766b1bb7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 May 2024 05:17:36 -0700 Subject: [PATCH 175/478] Update ops-related pbtxt files. PiperOrigin-RevId: 633910995 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 2073f2e00ecd81..0751aea36d13e7 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1,4 +1,4 @@ -go/debugonly +go/debugonly op { name: "Abort" attr { From 3c7d9c657e9f0e122cda70b178f56ef29aedf71d Mon Sep 17 00:00:00 2001 From: Johannes Reifferscheid Date: Wed, 15 May 2024 05:57:51 -0700 Subject: [PATCH 176/478] Revive MLIR interpreter. This is needed for debugging MLIR emitters. So far, the new GPU ops aren't supported. This just reverts the deletion, with a few changes: - removal of depreacted dialects (gml_st, thlo, lhlo, deallocation, xla CPU, xla rt) - adjustments for upstream changes (expand_shape, vector combiners, scf.parallel) - removal of mhlo ComputeReshapeShape - replacement of tsl::Status with absl::Status - move it out of mlir_hlo and adjust to TF style guide (...mostly) The tooling still needs to be hooked up to the GPU compiler. Reverts changelist 568496965 PiperOrigin-RevId: 633922803 --- .../xla/xla/mlir/tools/mlir_bisect/BUILD | 66 ++ .../xla/xla/mlir/tools/mlir_bisect/README.md | 85 ++ .../xla/mlir/tools/mlir_bisect/bisect_lib.cc | 87 ++ .../xla/mlir/tools/mlir_bisect/bisect_lib.h | 96 ++ .../xla/mlir/tools/mlir_bisect/mlir_bisect.cc | 360 +++++++ .../xla/mlir/tools/mlir_bisect/rewrites/BUILD | 26 + .../mlir/tools/mlir_bisect/rewrites/func.cc | 86 ++ .../tools/mlir_bisect/rewrites/general.cc | 194 ++++ .../mlir/tools/mlir_bisect/rewrites/scf.cc | 144 +++ .../tools/mlir_bisect/rewrites/tests/BUILD | 13 + .../tests/erase-op-without-results.mlir | 12 + .../rewrites/tests/inline-scf-while.mlir | 40 + .../tests/reduce-scf-forall-bounds.mlir | 16 + .../tests/replace-op-with-constant.mlir | 26 + .../rewrites/tests/replace-op-with-value.mlir | 16 + .../tests/replace-operand-with-constant.mlir | 28 + ...eturn-operands-of-terminator-operands.mlir | 15 + .../rewrites/tests/truncate-function.mlir | 31 + .../xla/mlir/tools/mlir_bisect/test_passes.cc | 48 + .../xla/mlir/tools/mlir_bisect/test_passes.h | 29 + .../xla/mlir/tools/mlir_bisect/tests/BUILD | 17 + .../mlir/tools/mlir_bisect/tests/bisect.mlir | 46 + .../mlir/tools/mlir_bisect/tests/no-bug.mlir | 10 + .../tools/mlir_bisect/tests/snapshot.mlir | 12 + .../tools/mlir_bisect/tests/snapshot.mlir.pb | Bin 0 -> 68 bytes .../xla/xla/mlir/tools/mlir_interpreter/BUILD | 40 + .../tools/mlir_interpreter/dialects/BUILD | 67 ++ .../tools/mlir_interpreter/dialects/affine.cc | 53 ++ .../tools/mlir_interpreter/dialects/arith.cc | 311 +++++++ .../dialects/bufferization.cc | 70 ++ .../mlir_interpreter/dialects/builtin.cc | 64 ++ .../mlir_interpreter/dialects/comparators.h | 105 +++ .../mlir_interpreter/dialects/complex.cc | 68 ++ .../mlir_interpreter/dialects/cwise_math.h | 242 +++++ .../tools/mlir_interpreter/dialects/func.cc | 122 +++ .../tools/mlir_interpreter/dialects/linalg.cc | 338 +++++++ .../tools/mlir_interpreter/dialects/math.cc | 64 ++ .../tools/mlir_interpreter/dialects/memref.cc | 253 +++++ .../tools/mlir_interpreter/dialects/mhlo.cc | 871 +++++++++++++++++ .../dialects/mhlo_binary_cwise.cc | 45 + .../dialects/mhlo_unary_cwise.cc | 84 ++ .../tools/mlir_interpreter/dialects/scf.cc | 222 +++++ .../tools/mlir_interpreter/dialects/tensor.cc | 262 ++++++ .../mlir_interpreter/dialects/tests/BUILD | 13 + .../dialects/tests/affine/apply.mlir | 63 ++ .../dialects/tests/affine/minmax.mlir | 36 + .../dialects/tests/arith/bitcast.mlir | 21 + .../dialects/tests/arith/cmpf.mlir | 129 +++ .../dialects/tests/arith/cmpi.mlir | 147 +++ .../dialects/tests/arith/constant.mlir | 37 + .../dialects/tests/arith/extf.mlir | 11 + .../dialects/tests/arith/fptosi.mlir | 21 + .../dialects/tests/arith/index_cast.mlir | 28 + .../dialects/tests/arith/int_math.mlir | 111 +++ .../dialects/tests/arith/minmax.mlir | 25 + .../dialects/tests/arith/negf.mlir | 21 + .../dialects/tests/arith/remf.mlir | 12 + .../dialects/tests/arith/select.mlir | 52 ++ .../dialects/tests/arith/sitofp.mlir | 31 + .../dialects/tests/arith/uitofp.mlir | 31 + .../dialects/tests/arith/vector_math.mlir | 12 + .../tests/bufferization/alloc_tensor.mlir | 30 + .../dialects/tests/bufferization/clone.mlir | 14 + .../tests/bufferization/to_memref.mlir | 10 + .../tests/bufferization/to_tensor.mlir | 11 + .../builtin/unrealized_conversion_cast.mlir | 21 + .../dialects/tests/complex/complex.mlir | 186 ++++ .../dialects/tests/func/call.mlir | 48 + .../dialects/tests/linalg/broadcast.mlir | 30 + .../dialects/tests/linalg/dot.mlir | 14 + .../dialects/tests/linalg/fill.mlir | 24 + .../dialects/tests/linalg/generic.mlir | 113 +++ .../dialects/tests/linalg/map.mlir | 74 ++ .../dialects/tests/linalg/matmul.mlir | 41 + .../dialects/tests/linalg/reduce.mlir | 57 ++ .../dialects/tests/linalg/transpose.mlir | 27 + .../dialects/tests/linalg/vecmat.mlir | 14 + .../dialects/tests/math/math.mlir | 252 +++++ .../dialects/tests/memref/alloc.mlir | 57 ++ .../dialects/tests/memref/collapse_shape.mlir | 33 + .../dialects/tests/memref/copy.mlir | 39 + .../dialects/tests/memref/dim.mlir | 12 + .../dialects/tests/memref/expand_shape.mlir | 52 ++ .../dialects/tests/memref/get_global.mlir | 12 + .../dialects/tests/memref/invalid.mlir | 77 ++ .../dialects/tests/memref/load.mlir | 12 + .../dialects/tests/memref/subview.mlir | 120 +++ .../dialects/tests/mhlo/bitcast_convert.mlir | 11 + .../dialects/tests/mhlo/broadcast_in_dim.mlir | 20 + .../dialects/tests/mhlo/case.mlir | 17 + .../dialects/tests/mhlo/clamp.mlir | 27 + .../dialects/tests/mhlo/compare.mlir | 143 +++ .../dialects/tests/mhlo/complex_math.mlir | 100 ++ .../dialects/tests/mhlo/concatenate.mlir | 37 + .../dialects/tests/mhlo/constant.mlir | 25 + .../dialects/tests/mhlo/convert.mlir | 21 + .../dialects/tests/mhlo/dot.mlir | 37 + .../dialects/tests/mhlo/dot_general.mlir | 73 ++ .../dialects/tests/mhlo/dynamic_slice.mlir | 32 + .../tests/mhlo/dynamic_update_slice.mlir | 34 + .../dialects/tests/mhlo/float_math.mlir | 199 ++++ .../dialects/tests/mhlo/gather.mlir | 58 ++ .../dialects/tests/mhlo/int_math.mlir | 358 +++++++ .../dialects/tests/mhlo/iota.mlir | 30 + .../dialects/tests/mhlo/pad.mlir | 56 ++ .../dialects/tests/mhlo/reduce.mlir | 17 + .../dialects/tests/mhlo/reshape.mlir | 34 + .../dialects/tests/mhlo/scatter.mlir | 55 ++ .../dialects/tests/mhlo/select.mlir | 14 + .../dialects/tests/mhlo/slice.mlir | 16 + .../dialects/tests/mhlo/sort.mlir | 25 + .../dialects/tests/mhlo/subtract.mlir | 10 + .../dialects/tests/mhlo/transpose.mlir | 28 + .../dialects/tests/mhlo/tuple.mlir | 30 + .../dialects/tests/mhlo/while.mlir | 25 + .../dialects/tests/scf/for.mlir | 82 ++ .../dialects/tests/scf/forall.mlir | 62 ++ .../dialects/tests/scf/if.mlir | 69 ++ .../dialects/tests/scf/parallel.mlir | 42 + .../dialects/tests/scf/while.mlir | 45 + .../dialects/tests/tensor/collapse_shape.mlir | 42 + .../dialects/tests/tensor/dim.mlir | 12 + .../dialects/tests/tensor/empty.mlir | 21 + .../dialects/tests/tensor/expand_shape.mlir | 31 + .../dialects/tests/tensor/extract.mlir | 13 + .../dialects/tests/tensor/extract_slice.mlir | 62 ++ .../dialects/tests/tensor/from_elements.mlir | 25 + .../dialects/tests/tensor/generate.mlir | 29 + .../dialects/tests/tensor/insert.mlir | 14 + .../dialects/tests/tensor/insert_slice.mlir | 25 + .../dialects/tests/tensor/pad.mlir | 38 + .../dialects/tests/vector/bitcast.mlir | 32 + .../dialects/tests/vector/broadcast.mlir | 51 + .../dialects/tests/vector/compressstore.mlir | 16 + .../dialects/tests/vector/constant_mask.mlir | 14 + .../dialects/tests/vector/contract.mlir | 141 +++ .../dialects/tests/vector/create_mask.mlir | 16 + .../dialects/tests/vector/expandload.mlir | 19 + .../dialects/tests/vector/extract.mlir | 52 ++ .../tests/vector/extract_strided_slice.mlir | 18 + .../dialects/tests/vector/extractelement.mlir | 22 + .../dialects/tests/vector/flat_transpose.mlir | 23 + .../dialects/tests/vector/fma.mlir | 13 + .../dialects/tests/vector/gather.mlir | 50 + .../dialects/tests/vector/insert.mlir | 57 ++ .../tests/vector/insert_strided_slice.mlir | 17 + .../dialects/tests/vector/insertelement.mlir | 24 + .../dialects/tests/vector/invalid.mlir | 39 + .../dialects/tests/vector/load.mlir | 27 + .../dialects/tests/vector/maskedload.mlir | 19 + .../dialects/tests/vector/maskedstore.mlir | 18 + .../tests/vector/multi_reduction.mlir | 46 + .../dialects/tests/vector/outerproduct.mlir | 155 ++++ .../dialects/tests/vector/reduction.mlir | 235 +++++ .../dialects/tests/vector/shape_cast.mlir | 23 + .../dialects/tests/vector/shuffle.mlir | 34 + .../dialects/tests/vector/splat.mlir | 11 + .../dialects/tests/vector/store.mlir | 39 + .../dialects/tests/vector/transfer_read.mlir | 118 +++ .../dialects/tests/vector/transfer_write.mlir | 91 ++ .../dialects/tests/vector/transpose.mlir | 28 + .../dialects/tests/vector/type_cast.mlir | 11 + .../dialects/tests/vector/vscale.mlir | 12 + .../tools/mlir_interpreter/dialects/util.cc | 180 ++++ .../tools/mlir_interpreter/dialects/util.h | 85 ++ .../tools/mlir_interpreter/dialects/vector.cc | 875 ++++++++++++++++++ .../tools/mlir_interpreter/framework/BUILD | 42 + .../mlir_interpreter/framework/interpreter.cc | 150 +++ .../mlir_interpreter/framework/interpreter.h | 186 ++++ .../framework/interpreter_value.cc | 382 ++++++++ .../framework/interpreter_value.h | 227 +++++ .../framework/interpreter_value_util.h | 190 ++++ .../framework/registration.cc | 122 +++ .../mlir_interpreter/framework/registration.h | 229 +++++ .../framework/tensor_or_memref.cc | 162 ++++ .../framework/tensor_or_memref.h | 364 ++++++++ .../mlir_interpreter/framework/tests/BUILD | 25 + .../framework/tests/interpreter_value_test.cc | 238 +++++ .../framework/tests/tensor_or_memref_test.cc | 104 +++ .../mlir_interpreter_runner.cc | 138 +++ .../xla/xla/mlir/tools/mlir_replay/BUILD | 63 ++ .../xla/xla/mlir/tools/mlir_replay/README.md | 48 + .../xla/mlir/tools/mlir_replay/mlir_replay.cc | 236 +++++ .../mlir/tools/mlir_replay/mlir_replay_lib.cc | 254 +++++ .../mlir/tools/mlir_replay/mlir_replay_lib.h | 40 + .../xla/mlir/tools/mlir_replay/public/BUILD | 56 ++ .../mlir_replay/public/compiler_trace.proto | 31 + .../mlir_replay/public/execution_trace.proto | 72 ++ .../public/execution_trace_utils.cc | 449 +++++++++ .../public/execution_trace_utils.h | 76 ++ .../public/execution_trace_utils_test.cc | 139 +++ 191 files changed, 15743 insertions(+) create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/BUILD create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/README.md create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.h create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/mlir_bisect.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/BUILD create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/func.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/general.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/scf.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/BUILD create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/erase-op-without-results.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/inline-scf-while.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/reduce-scf-forall-bounds.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-constant.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-value.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-operand-with-constant.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/return-operands-of-terminator-operands.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/truncate-function.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.h create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/tests/BUILD create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/tests/bisect.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/tests/no-bug.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir.pb create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/BUILD create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/BUILD create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/affine.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/arith.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/bufferization.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/builtin.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/comparators.h create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/complex.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/cwise_math.h create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/func.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/linalg.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/math.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/memref.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/mhlo.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/mhlo_binary_cwise.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/mhlo_unary_cwise.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/scf.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tensor.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/BUILD create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/affine/apply.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/affine/minmax.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/bitcast.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/cmpf.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/cmpi.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/constant.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/extf.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/fptosi.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/index_cast.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/int_math.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/minmax.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/negf.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/remf.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/select.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/sitofp.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/uitofp.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/arith/vector_math.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/bufferization/alloc_tensor.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/bufferization/clone.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/bufferization/to_memref.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/bufferization/to_tensor.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/builtin/unrealized_conversion_cast.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/complex/complex.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/func/call.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/linalg/broadcast.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/linalg/dot.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/linalg/fill.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/linalg/generic.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/linalg/map.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/linalg/matmul.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/linalg/reduce.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/linalg/transpose.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/linalg/vecmat.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/math/math.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/memref/alloc.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/memref/collapse_shape.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/memref/copy.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/memref/dim.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/memref/expand_shape.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/memref/get_global.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/memref/invalid.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/memref/load.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/memref/subview.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/bitcast_convert.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/broadcast_in_dim.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/case.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/clamp.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/compare.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/complex_math.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/concatenate.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/constant.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/convert.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/dot.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/dot_general.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/dynamic_slice.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/dynamic_update_slice.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/float_math.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/gather.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/int_math.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/iota.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/pad.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/reduce.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/reshape.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/scatter.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/select.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/slice.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/sort.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/subtract.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/transpose.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/tuple.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/mhlo/while.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/scf/for.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/scf/forall.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/scf/if.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/scf/parallel.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/scf/while.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/tensor/collapse_shape.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/tensor/dim.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/tensor/empty.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/tensor/expand_shape.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/tensor/extract.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/tensor/extract_slice.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/tensor/from_elements.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/tensor/generate.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/tensor/insert.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/tensor/insert_slice.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/tensor/pad.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/bitcast.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/broadcast.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/compressstore.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/constant_mask.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/contract.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/create_mask.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/expandload.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/extract.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/extract_strided_slice.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/extractelement.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/flat_transpose.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/fma.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/gather.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/insert.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/insert_strided_slice.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/insertelement.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/invalid.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/load.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/maskedload.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/maskedstore.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/multi_reduction.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/outerproduct.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/reduction.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/shape_cast.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/shuffle.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/splat.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/store.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/transfer_read.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/transfer_write.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/transpose.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/type_cast.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/tests/vector/vscale.mlir create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/util.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/util.h create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/vector.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/framework/BUILD create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/framework/interpreter.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/framework/interpreter.h create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/framework/interpreter_value.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/framework/interpreter_value.h create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/framework/interpreter_value_util.h create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/framework/registration.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/framework/registration.h create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/framework/tensor_or_memref.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/framework/tensor_or_memref.h create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/framework/tests/BUILD create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/framework/tests/interpreter_value_test.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/framework/tests/tensor_or_memref_test.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_interpreter/mlir_interpreter_runner.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/BUILD create mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/README.md create mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay_lib.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/mlir_replay_lib.h create mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/public/BUILD create mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/public/compiler_trace.proto create mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace.proto create mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils.cc create mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils.h create mode 100644 third_party/xla/xla/mlir/tools/mlir_replay/public/execution_trace_utils_test.cc diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/BUILD b/third_party/xla/xla/mlir/tools/mlir_bisect/BUILD new file mode 100644 index 00000000000000..c394e91ad24568 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/BUILD @@ -0,0 +1,66 @@ +load("@bazel_skylib//rules:build_test.bzl", "build_test") +load("@local_tsl//tsl/platform:rules_cc.bzl", "cc_library") +load("//xla:xla.bzl", "xla_cc_binary") + +# copybara:uncomment package(default_applicable_licenses = ["//tensorflow:license"]) + +build_test( + name = "mlir-bisect_build_test", + targets = [ + ":mlir-bisect", + ], +) + +xla_cc_binary( + name = "mlir-bisect", + testonly = True, + srcs = ["mlir_bisect.cc"], + visibility = ["//visibility:public"], + deps = [ + ":bisect_lib", + "//xla:literal", + "//xla/mlir/tools/mlir_bisect/rewrites", + "//xla/mlir/tools/mlir_interpreter/dialects", + "//xla/mlir/tools/mlir_interpreter/framework", + "//xla/mlir/tools/mlir_replay/public:execution_trace_utils", + "//xla/mlir_hlo:hlo_dialect_registration", + "//xla/mlir_hlo:mhlo_passes", + "//xla/service:hlo_proto_cc", + "@com_google_absl//absl/log:check", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:AffineUtils", + "@llvm-project//mlir:AllPassesAndDialects", + "@llvm-project//mlir:ArithDialect", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:ParseUtilities", + "@llvm-project//mlir:Parser", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:Support", + "@llvm-project//mlir:Transforms", + "@local_tsl//tsl/platform:env", + "@local_tsl//tsl/platform:platform_port", + ], +) + +cc_library( + name = "bisect_lib", + srcs = [ + "bisect_lib.cc", + "test_passes.cc", + ], + hdrs = [ + "bisect_lib.h", + "test_passes.h", + ], + visibility = ["//visibility:public"], + deps = [ + "//xla/mlir/tools/mlir_replay/public:execution_trace_proto_cc", + "//xla/mlir/tools/mlir_replay/public:execution_trace_proto_cc_impl", + "//xla/mlir/tools/mlir_replay/public:execution_trace_utils", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:LinalgDialect", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:Support", + ], +) diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/README.md b/third_party/xla/xla/mlir/tools/mlir_bisect/README.md new file mode 100644 index 00000000000000..570e92b6e53802 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/README.md @@ -0,0 +1,85 @@ +# MLIR HLO mlir_bisect + +This is a test case reduction tool, similar in purpose to `mlir-reduce`, but +specific to the `mlir-interpreter` infrastructure. In particular, reductions can +depend on concrete values encountered during execution, and reductions can (and +usually do) generate multiple candidates. + +For example, the `ReplaceOpWithConstant` reduction will attempt to replace each +op with each of its results. If the op is in a loop, each execution will be a +candidate for replacement. + +## Using this tool + +1. Run a JAX test with snapshots enabled: + + ``` + bazel test some-jax-test + --test_env=XLA_FLAGS="--xla_cpu_use_xla_runtime --xla_dump_to=/tmp/dump + --xla_dump_hlo_snapshots" --test_filter=SomeSpecific.Test + --test_sharding_strategy=disabled --test_strategy=local + ``` + +1. Figure out the culprit module and pass (sorry, no automation yet): + + ``` + bazel run tensorflow/compiler/xla/mlir/tools/mlir_replay:mlir_replay -- \ + --mlir-compilation-trace=/tmp/dump/module_0000.jit__something.mlir-trace.pb \ + --hlo-snapshot=/tmp/dump/module_0000.jit__something.snapshot.0.pb \ + --print-changes-only \ + --execution-trace-dir=/tmp/execution + ``` + + You should see a pass after which results change. You'll want to use the + .mlir file in `/tmp/execution` corresponding to the pass *before* that with + the bisect tool. + + Note: If the failing pass is bufferization, you may have to use an earlier + snapshot, e.g. before EmptyTensorToAllocTensor. +1. Run bisect: + + ``` + bazel run tensorflow/compiler/xla/mlir/tools/mlir_bisect:mlir-bisect -- \ + --hlo-snapshot=/tmp/dump/module_0000.jit_something.snapshot.0.pb \ + --pass-pipeline="builtin.module(empty-tensor-to-alloc-tensor,one-shot-bufferize{allow-return-allocs bufferize-function-boundaries create-deallocs=0})" \ + /tmp/execution/0052.ScalarizationPass.mlir + ``` + +## Adding a reduction + +To add a reduction, create a function that generates the candidates and register +it: + +``` +SmallVector> +FrobulateAndDefenestrate(BisectState&, dialect::SomeOp some_op) { + auto [cloned_module_1, cloned_op_1] = CloneModuleFor(some_op); + Frobulate(cloned_op_1); + + auto [cloned_module_2, cloned_op_2] = CloneModuleFor(some_op); + Defenestrate(cloned_op_2); + + return {cloned_module_1, cloned_module_2}; +} + +REGISTER_MLIR_REDUCE_STRATEGY(FrobulateAndDefenestrate); +``` + +Then, add a test for the strategy. Make sure your strategy is linked into +mlir-bisect and has `alwayslink` set. + +``` +// RUN: mlir-bisect %s --debug-strategy=FrobulateAndDefenestrate | FileCheck %s + +func.func @main() { + dialect.some_op() +} + +// CHECK: func @main() +// CHECK-NEXT: frobulated + +// CHECK: func @main() +// CHECK-NEXT: defenestrated +``` + +`--debug-strategy` will print all candidates generated by the given strategy. diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.cc b/third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.cc new file mode 100644 index 00000000000000..c76e226ff7a6fa --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.cc @@ -0,0 +1,87 @@ +/* Copyright 2023 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "xla/mlir/tools/mlir_bisect/bisect_lib.h" + +#include +#include +#include +#include + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/OwningOpRef.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project + +namespace mlir { +namespace bisect { + +Operation* FindInClone(Operation* op, ModuleOp clone) { + if (llvm::isa(op)) { + return clone; + } + + auto* parent_clone = FindInClone(op->getParentOp(), clone); + auto cloned_ops = + parent_clone->getRegions()[op->getParentRegion()->getRegionNumber()] + .getOps(); + for (auto [original_op, cloned_op] : + llvm::zip(op->getParentRegion()->getOps(), cloned_ops)) { + if (&original_op == op) { + return &cloned_op; + } + } + + llvm_unreachable("Op not found in clone."); +} + +std::pair, Operation*> CloneModuleFor(Operation* op) { + auto module = op->getParentOfType().clone(); + return {OwningOpRef{module}, FindInClone(op, module)}; +} + +namespace detail { + +DenseMap>& +GetStrategies() { + static auto* strategies = + new DenseMap>(); + return *strategies; +} + +void RegisterReduceStrategy( + StringRef name, + std::function fn) { + GetStrategies()[name] = std::move(fn); +} + +CandidateVector GetCandidates( + const std::function& strategy, + BisectState& state, ModuleOp op) { + assert(strategy && "GetCandidates was passed a null strategy"); + CandidateVector result; + op.lookupSymbol("main")->walk([&](Operation* subOp) { + llvm::move(strategy(state, subOp), std::back_inserter(result)); + }); + return result; +} + +} // namespace detail +} // namespace bisect +} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.h b/third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.h new file mode 100644 index 00000000000000..a4784263313de5 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/bisect_lib.h @@ -0,0 +1,96 @@ +/* Copyright 2023 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_MLIR_TOOLS_MLIR_BISECT_BISECT_LIB_H_ +#define XLA_MLIR_TOOLS_MLIR_BISECT_BISECT_LIB_H_ + +#include +#include +#include + +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "xla/mlir/tools/mlir_replay/public/execution_trace.pb.h" +#include "xla/mlir/tools/mlir_replay/public/execution_trace_utils.h" + +#define REGISTER_MLIR_REDUCE_STRATEGY(name) \ + static int name##_init = []() { \ + ::mlir::bisect::detail::RegisterReduceStrategy(#name, name); \ + return 1; \ + }(); + +namespace mlir { +namespace bisect { + +class BisectState { + public: + void SetTrace(mlir::interpreter::ExecutionTrace trace) { + trace_ = std::move(trace); + } + + // Returns all executions of the given op. + llvm::SmallVector GetExecutions( + mlir::Operation* op) const { + return interpreter::FindOpExecutionsInTrace(trace_, op); + } + + private: + mlir::interpreter::ExecutionTrace trace_; +}; + +std::pair, Operation*> CloneModuleFor(Operation* op); +Operation* FindInClone(Operation* op, ModuleOp clone); + +template +std::pair, Op> CloneModuleFor(Op op) { + auto [module, op_clone] = CloneModuleFor(op.getOperation()); + return {std::move(module), llvm::cast(op_clone)}; +} + +namespace detail { + +using CandidateVector = SmallVector()>>; + +CandidateVector GetCandidates( + const std::function& strategy, + BisectState& state, ModuleOp op); + +DenseMap>& +GetStrategies(); + +// Registers a strategy that applies to all ops. +void RegisterReduceStrategy( + StringRef name, + std::function fn); + +// Registers a strategy that applies to specific ops. +template +void RegisterReduceStrategy(StringRef name, + CandidateVector (*fn)(BisectState&, Op)) { + RegisterReduceStrategy( + name, [fn](BisectState& state, Operation* op) -> CandidateVector { + if (auto cast = llvm::dyn_cast(op)) { + return fn(state, cast); + } + return {}; + }); +} + +} // namespace detail + +} // namespace bisect +} // namespace mlir + +#endif // XLA_MLIR_TOOLS_MLIR_BISECT_BISECT_LIB_H_ diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/mlir_bisect.cc b/third_party/xla/xla/mlir/tools/mlir_bisect/mlir_bisect.cc new file mode 100644 index 00000000000000..ca1a699444b452 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/mlir_bisect.cc @@ -0,0 +1,360 @@ +/* Copyright 2023 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "mlir/Dialect/Affine/Utils.h" // from @llvm-project +#include "mlir/Dialect/Arith/IR/Arith.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/BuiltinAttributeInterfaces.h" // from @llvm-project +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "mlir/IR/DialectRegistry.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/OwningOpRef.h" // from @llvm-project +#include "mlir/IR/SymbolTable.h" // from @llvm-project +#include "mlir/IR/Verifier.h" // from @llvm-project +#include "mlir/InitAllDialects.h" // from @llvm-project +#include "mlir/InitAllPasses.h" // from @llvm-project +#include "mlir/Parser/Parser.h" // from @llvm-project +#include "mlir/Pass/PassManager.h" // from @llvm-project +#include "mlir/Pass/PassRegistry.h" // from @llvm-project +#include "mlir/Support/FileUtilities.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project +#include "mlir/Tools/ParseUtilities.h" // from @llvm-project +#include "mlir/Transforms/Passes.h" // from @llvm-project +#include "xla/literal.h" +#include "xla/mlir/tools/mlir_bisect/bisect_lib.h" +#include "xla/mlir/tools/mlir_bisect/test_passes.h" +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter.h" +#include "xla/mlir/tools/mlir_replay/public/execution_trace_utils.h" +#include "xla/mlir_hlo/mhlo/IR/register.h" +#include "xla/mlir_hlo/mhlo/transforms/passes.h" +#include "xla/service/hlo.pb.h" +#include "tsl/platform/env.h" +#include "tsl/platform/init_main.h" + +struct Options { + llvm::cl::opt input_filename{llvm::cl::Positional, + llvm::cl::desc(""), + llvm::cl::init("-")}; + llvm::cl::opt hlo_snapshot{ + "hlo-snapshot", + llvm::cl::desc( + "If set, get argument values from the given snapshot. If not set, " + "the input function must not have any arguments."), + llvm::cl::init("")}; + llvm::cl::opt debug_strategy{ + "debug-strategy", + llvm::cl::desc("If set, print all reductions for the given strategy and " + "exit. For testing."), + llvm::cl::init("")}; + llvm::cl::opt expected_error{ + "expected-error", + llvm::cl::desc("If set, expect the given error message after applying " + "the pass instead of a successful execution."), + llvm::cl::init("")}; + llvm::cl::opt max_steps_per_run{ + "max-steps-per-run", + llvm::cl::desc("Maximum number of steps to execute for each attempt."), + llvm::cl::init(100000)}; + mlir::PassPipelineCLParser pass_pipeline{"", "Passes to run"}; + llvm::cl::opt canonicalize{ + "enable-canonicalization", + llvm::cl::desc("If set, canonicalize candidates before trying them. Set " + "to false if you're bisecting --canonicalize."), + llvm::cl::init(true)}; +}; + +namespace mlir { +namespace bisect { +namespace { + +OwningOpRef ParseMlirInput(llvm::StringRef inputFilename, + MLIRContext* context) { + std::string error_message; + auto file = mlir::openInputFile(inputFilename, &error_message); + if (!file) { + llvm::errs() << error_message << "\n"; + return {}; + } + + auto source_mgr = std::make_shared(); + source_mgr->AddNewSourceBuffer(std::move(file), SMLoc()); + return parseSourceFile(source_mgr, context); +} + +LogicalResult RunPipeline(ModuleOp module, const Options& options) { + if (!options.pass_pipeline.hasAnyOccurrences()) { + return mlir::success(); + } + + auto error_handler = [&](const llvm::Twine& msg) { + llvm::errs() << msg << "\n"; + return failure(); + }; + PassManager pm(module.getContext()); + if (failed(options.pass_pipeline.addToPipeline(pm, error_handler)) || + failed(pm.run(module))) { + llvm::errs() << "pipeline failed\n"; + return failure(); + } + return success(); +} + +LogicalResult Run(mlir::Operation* module, interpreter::ExecutionTrace* trace, + const Options& options) { + SymbolTable symbol_table{module}; + interpreter::ExecutionTraceListener tracer(trace); + interpreter::InterpreterOptions interpreter_options; + interpreter_options.listener = &tracer; + interpreter_options.max_steps = options.max_steps_per_run; + auto results_before_pass = interpreter::RunInterpreter( + symbol_table, llvm::cast(symbol_table.lookup("main")), {}, + interpreter_options); + + if (!results_before_pass.ok()) { + llvm::errs() << "Interpreter failed\n"; + return failure(); + } + + if (!options.debug_strategy.empty()) { + return success(); + } + + OwningOpRef clone(llvm::cast(module).clone()); + if (!succeeded(RunPipeline(*clone, options))) { + return failure(); + } + + SymbolTable symbol_table_after{*clone}; + interpreter_options.listener = nullptr; + bool found_expected_error = false; + if (!options.expected_error.empty()) { + auto original_handler = std::move(interpreter_options.error_handler); + interpreter_options.error_handler = [&](llvm::StringRef failure) { + found_expected_error |= + failure.find(options.expected_error) != std::string::npos; + original_handler(failure); + }; + } + + auto results_after_pass = interpreter::RunInterpreter( + symbol_table_after, + llvm::cast(symbol_table_after.lookup("main")), {}, + std::move(interpreter_options)); + + if (!results_after_pass.ok()) { + if (found_expected_error) { + return success(); + } + llvm::errs() << "Interpreter failed\n"; + return failure(); + } else if (!options.expected_error.empty()) { + llvm::errs() << "Expected error not seen\n"; + return failure(); + } + + // If the results are the same, the bug is no longer present. + if (*results_before_pass == *results_after_pass) { + return failure(); + } + + llvm::errs() << "results before:\n"; + for (auto& result : *results_before_pass) { + llvm::errs() << " " << result.ToString() << "\n"; + } + llvm::errs() << "\nresults after:\n"; + for (auto& result : *results_after_pass) { + llvm::errs() << " " << result.ToString() << "\n"; + } + + return success(); +} + +LogicalResult Canonicalize(ModuleOp module) { + PassManager pm(module.getContext()); + pm.addPass(createCanonicalizerPass()); + return pm.run(module.getOperation()); +} + +OwningOpRef ReduceModule(OwningOpRef module, + BisectState& state, const Options& options) { + auto strategies = llvm::to_vector(mlir::bisect::detail::GetStrategies()); + + auto apply_step = [&]() -> std::optional> { + for (auto it = strategies.begin(); it != strategies.end(); ++it) { + for (auto& candidate_fn : + detail::GetCandidates(it->second, state, *module)) { + auto candidate = candidate_fn(); + if (!candidate || !mlir::verify(*candidate).succeeded()) { + continue; + } + if (options.canonicalize && !Canonicalize(*candidate).succeeded()) { + continue; + } + + interpreter::ExecutionTrace trace; + // Verify that the candidate is still buggy. + if (!Run(*candidate, &trace, options).succeeded()) { + continue; + } + + // Print the new buggy module. + llvm::outs() << "module after " << it->first << ":\n" + << *candidate << "\n\n"; + + // Update the trace. + state.SetTrace(std::move(trace)); + + // Move strategies to the end. + decltype(strategies) new_strategies; + std::copy(it + 1, strategies.end(), std::back_inserter(new_strategies)); + std::copy(strategies.begin(), it + 1, + std::back_inserter(new_strategies)); + strategies = std::move(new_strategies); + return {candidate.release()}; + } + } + return std::nullopt; + }; + + while (auto new_module = apply_step()) { + module = std::move(*new_module); + } + return module; +} + +void ReplaceArgsWithConstants(ModuleOp module, + const xla::HloSnapshot& snapshot) { + auto main = llvm::cast(module.lookupSymbol("main")); + OpBuilder b(main.getBody()); + for (auto [arg, bbarg] : + llvm::zip(snapshot.arguments(), main.getBody().getArguments())) { + auto attr = interpreter::ValueToAttribute( + *interpreter::LiteralToValue(*xla::Literal::CreateFromProto(arg)), + bbarg.getType()); + CHECK_EQ(attr.size(), 1) << "unsupported argument"; + + auto constant = b.create( + main.getLoc(), bbarg.getType(), llvm::cast(attr.front())); + bbarg.replaceAllUsesWith(constant); + } + + // The remaining ops are output args, so we replace them with allocs. + for (auto arg : + main.getBody().getArguments().drop_front(snapshot.arguments().size())) { + CHECK(llvm::isa(arg.getType())) << "unsupported argument"; + arg.replaceAllUsesWith(b.create( + module.getLoc(), llvm::cast(arg.getType()))); + } + while (main.getBody().getNumArguments() > 0) { + main.getBody().eraseArgument(0); + } + main.setFunctionType(FunctionType::get(main.getContext(), /*inputs=*/{}, + main.getFunctionType().getResults())); + main.setArgAttrsAttr(b.getArrayAttr({})); +} + +} // namespace +} // namespace bisect +} // namespace mlir + +int main(int argc, char* argv[]) { + llvm::errs().tie(&llvm::outs()); + llvm::outs().tie(&llvm::errs()); + int dummy_argc = 1; + tsl::port::InitMain("", &dummy_argc, &argv); + + Options options; + llvm::cl::ParseCommandLineOptions(argc, argv, "MLIR bisect tool\n"); + + mlir::DialectRegistry registry; + mlir::registerAllDialects(registry); + mlir::registerAllPasses(); + mlir::bisect::test::RegisterTestPasses(); + mlir::mhlo::registerAllMhloPasses(); + mlir::mhlo::registerAllMhloDialects(registry); + + registry.insert(); + + mlir::MLIRContext context(registry); + context.getOrLoadDialect(); + auto module = mlir::bisect::ParseMlirInput(options.input_filename, &context); + + if (!options.hlo_snapshot.empty()) { + xla::HloSnapshot snapshot; + CHECK_OK(tsl::ReadBinaryProto(tsl::Env::Default(), options.hlo_snapshot, + &snapshot)); + mlir::bisect::ReplaceArgsWithConstants(*module, snapshot); + } + + if (options.debug_strategy.empty()) { + llvm::outs() << "initial module:\n" << *module << "\n"; + } + + mlir::interpreter::ExecutionTrace trace; + if (!mlir::bisect::Run(*module, &trace, options).succeeded()) { + llvm::outs() << "Did not find bug in initial module\n"; + if (options.pass_pipeline.hasAnyOccurrences() && + mlir::succeeded(mlir::bisect::RunPipeline(*module, options))) { + llvm::outs() << "Module after running pipeline:\n" << *module << "\n"; + } + return 1; + } + + mlir::bisect::BisectState state; + state.SetTrace(std::move(trace)); + if (!options.debug_strategy.empty()) { + bool some_failed = false; + for (auto& candidate : mlir::bisect::detail::GetCandidates( + mlir::bisect::detail::GetStrategies()[options.debug_strategy], + state, *module)) { + auto new_module = candidate(); + if (!new_module) { + continue; + } + llvm::outs() << *new_module << "\n\n"; + if (!mlir::verify(*new_module).succeeded()) { + some_failed = true; + llvm::errs() << "verification failed\n"; + } + } + return some_failed ? 1 : 0; + } + + module = mlir::bisect::ReduceModule(std::move(module), state, options); + + llvm::outs() << "Final module:\n" << *module << "\n"; + if (options.pass_pipeline.hasAnyOccurrences() && + mlir::succeeded(mlir::bisect::RunPipeline(*module, options))) { + llvm::outs() << "Final module after running pipeline:\n" << *module << "\n"; + } + return 0; +} diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/BUILD b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/BUILD new file mode 100644 index 00000000000000..18620fa5b880ec --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/BUILD @@ -0,0 +1,26 @@ +load("@local_tsl//tsl/platform:rules_cc.bzl", "cc_library") + +# copybara:uncomment package(default_applicable_licenses = ["//tensorflow:license"]) + +cc_library( + name = "rewrites", + srcs = [ + "func.cc", + "general.cc", + "scf.cc", + ], + visibility = ["//visibility:public"], + deps = [ + "//xla/mlir/tools/mlir_bisect:bisect_lib", + "//xla/mlir/tools/mlir_replay/public:execution_trace_utils", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:ArithDialect", + "@llvm-project//mlir:DialectUtils", + "@llvm-project//mlir:FuncDialect", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:SCFDialect", + "@llvm-project//mlir:SideEffectInterfaces", + "@llvm-project//mlir:Support", + ], + alwayslink = 1, +) diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/func.cc b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/func.cc new file mode 100644 index 00000000000000..3715f36b825692 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/func.cc @@ -0,0 +1,86 @@ +/* Copyright 2023 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "llvm/ADT/STLExtras.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project +#include "mlir/IR/BuiltinAttributes.h" // from @llvm-project +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "mlir/IR/OwningOpRef.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/IR/ValueRange.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "xla/mlir/tools/mlir_bisect/bisect_lib.h" + +namespace mlir { +namespace bisect { +namespace { + +void SetReturnValues(func::FuncOp func, ValueRange values) { + // We only operate on functions without arguments. + func.setFunctionType(mlir::FunctionType::get(func.getContext(), /*inputs=*/{}, + values.getTypes())); + func.getBody().getBlocks().front().getTerminator()->setOperands(values); +} + +SmallVector()>> TruncateFunction( + BisectState&, func::FuncOp func) { + SmallVector()>> result; + for (auto& ret : func.getBody().getBlocks().front().without_terminator()) { + if (func.getBody().getBlocks().front().getTerminator()->getOperands() == + ret.getResults()) { + continue; + } + auto fun = [r = &ret]() -> OwningOpRef { + auto [module, ret_clone] = CloneModuleFor(r); + SetReturnValues(ret_clone->getParentOfType(), + ret_clone->getResults()); + return std::move(module); + }; + result.push_back(fun); + } + return result; +} + +SmallVector()>> +ReturnOperandsOfTerminatorOperands(BisectState&, func::FuncOp func) { + SmallVector()>> result; + result.push_back([func]() -> OwningOpRef { + auto [module, func_clone] = CloneModuleFor(func); + auto* terminator = func_clone.getBody().getBlocks().front().getTerminator(); + SmallVector new_operands; + for (auto operand : terminator->getOperands()) { + if (operand.getDefiningOp()) { + llvm::copy(operand.getDefiningOp()->getOperands(), + std::back_inserter(new_operands)); + } else { + return nullptr; + } + } + SetReturnValues(func_clone, new_operands); + return std::move(module); + }); + return result; +} + +REGISTER_MLIR_REDUCE_STRATEGY(TruncateFunction); +REGISTER_MLIR_REDUCE_STRATEGY(ReturnOperandsOfTerminatorOperands); + +} // namespace +} // namespace bisect +} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/general.cc b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/general.cc new file mode 100644 index 00000000000000..e54e81aebeebb8 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/general.cc @@ -0,0 +1,194 @@ +/* Copyright 2023 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Casting.h" +#include "mlir/Dialect/Arith/IR/Arith.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/BuiltinAttributeInterfaces.h" // from @llvm-project +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "mlir/IR/OpDefinition.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/OwningOpRef.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "xla/mlir/tools/mlir_bisect/bisect_lib.h" +#include "xla/mlir/tools/mlir_replay/public/execution_trace_utils.h" + +namespace mlir { +namespace bisect { +namespace { + +bool IsTerminator(Operation* op) { + return op->hasTrait(); +} + +bool IsTopLevelOp(Operation* op) { + return !op->getBlock()->back().mightHaveTrait(); +} + +SmallVector()>> EraseOpWithoutResults( + BisectState& state, Operation* op) { + // Only erase ops with results if they're unused. + if (op->getNumResults() > 0 && !op->use_empty()) { + return {}; + } + + // Don't erase entire functions, constants, terminators. + if (IsTopLevelOp(op) || IsTerminator(op)) { + return {}; + } + + SmallVector()>> ret; + ret.push_back([op]() { + auto [module, cloned_op] = CloneModuleFor(op); + cloned_op->erase(); + return std::move(module); + }); + return ret; +} + +llvm::SmallVector()>> ReplaceOpWithConstant( + BisectState& state, Operation* op) { + llvm::SmallVector()>> result; + if (op->hasTrait() || IsTopLevelOp(op) || + IsTerminator(op) || op->use_empty() || op->getNumResults() == 0) { + return result; + } + + auto mii = llvm::dyn_cast(op); + if (mii && mii.hasEffect()) { + // Don't replace allocs with constants. + return result; + } + + // Ops that are never executed won't be replaced here, but we have other + // strategies that get rid of them (e.g. deleting the entire region). + for (auto* execution : state.GetExecutions(op)) { + assert(execution->results_size() == op->getNumResults() && + "unexpected number of results"); + + result.push_back([execution, op]() -> OwningOpRef { + auto [module_clone, op_clone] = CloneModuleFor(op); + SmallVector results; + OpBuilder b(op_clone); + for (int64_t i = 0; i < op->getNumResults(); ++i) { + auto type = op->getResultTypes()[i]; + auto value = *interpreter::TracedValueToValue( + execution->results(static_cast(i))); + auto attribute = interpreter::ValueToAttribute(value, type); + // We don't currently support tuples. + if (attribute.size() != 1) { + return nullptr; + } + op_clone->getResults()[i].replaceAllUsesWith( + b.create( + op_clone->getLoc(), type, + llvm::cast(attribute.front()))); + } + return std::move(module_clone); + }); + } + return result; +} + +llvm::SmallVector()>> +ReplaceOperandWithConstant(BisectState& state, Operation* op) { + llvm::SmallVector()>> result; + if (IsTopLevelOp(op) || op->getNumOperands() == 0) { + return result; + } + + for (auto* execution : state.GetExecutions(op)) { + for (int64_t i = 0; i < op->getNumOperands(); ++i) { + auto operand = op->getOperand(i); + if (operand.getDefiningOp() && + operand.getDefiningOp()->hasTrait()) { + continue; + } + result.push_back([execution, i, op]() -> OwningOpRef { + auto type = op->getOperandTypes()[i]; + auto value = *interpreter::TracedValueToValue( + execution->args(static_cast(i))); + auto attribute = interpreter::ValueToAttribute(value, type); + if (attribute.size() != 1) { + return nullptr; + } + auto [module_clone, op_clone] = CloneModuleFor(op); + OpBuilder b(op_clone); + op_clone->setOperand(i, b.create( + op_clone->getLoc(), type, + llvm::cast(attribute.front()))); + return std::move(module_clone); + }); + } + } + return result; +} + +// Replaces an op's result with some other value with the same type defined +// previously in the same region. +llvm::SmallVector()>> ReplaceOpWithValue( + BisectState&, Operation* op) { + llvm::SmallVector()>> ret; + if (op->hasTrait() || IsTopLevelOp(op) || + IsTerminator(op)) { + return ret; + } + + // TODO(jreiffers): Consider bbargs. + llvm::DenseMap>> + candidates_by_type; + for (auto* pred = op->getPrevNode(); pred != nullptr; + pred = pred->getPrevNode()) { + for (auto [index, result] : llvm::enumerate(pred->getResults())) { + candidates_by_type[result.getType()].emplace_back(pred, index); + } + } + + for (auto [index, result] : llvm::enumerate(op->getResults())) { + if (result.use_empty()) { + continue; + } + + for (auto [new_result_op, new_result_index] : + candidates_by_type[result.getType()]) { + ret.push_back( + [op, i = index, j = new_result_index, result_op = new_result_op]() { + auto [module_clone, op_clone] = CloneModuleFor(op); + op_clone->getResults()[i].replaceAllUsesWith( + FindInClone(result_op, module_clone.get())->getResults()[j]); + return std::move(module_clone); + }); + } + } + return ret; +} + +REGISTER_MLIR_REDUCE_STRATEGY(EraseOpWithoutResults); +REGISTER_MLIR_REDUCE_STRATEGY(ReplaceOpWithConstant); +REGISTER_MLIR_REDUCE_STRATEGY(ReplaceOpWithValue); +REGISTER_MLIR_REDUCE_STRATEGY(ReplaceOperandWithConstant); + +} // namespace +} // namespace bisect +} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/scf.cc b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/scf.cc new file mode 100644 index 00000000000000..3445393ff9798b --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/scf.cc @@ -0,0 +1,144 @@ +/* Copyright 2023 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/Dialect/SCF/IR/SCF.h" // from @llvm-project + +#include // NOLINT +#include // NOLINT +#include // NOLINT + +#include "mlir/Dialect/Utils/StaticValueUtils.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/BuiltinOps.h" // from @llvm-project +#include "mlir/IR/OpDefinition.h" // from @llvm-project +#include "mlir/IR/OwningOpRef.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/IR/ValueRange.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "xla/mlir/tools/mlir_bisect/bisect_lib.h" + +namespace mlir { +namespace bisect { +namespace { + +constexpr int64_t kMaxWhileIterations = 1; + +// Rewrites a while loop to execute its body a fixed number of times. The +// condition is executed, but its result is ignored. +// For ease of implementation, this generates scf.execute_region ops. These are +// subsequently canonicalized away. +llvm::SmallVector()>> InlineScfWhile( + BisectState&, scf::WhileOp while_op) { + llvm::SmallVector()>> result; + for (int64_t num_executions = 0; num_executions <= kMaxWhileIterations; + ++num_executions) { + using ::mlir::scf::ExecuteRegionOp; + + result.push_back([while_op, num_executions]() -> OwningOpRef { + auto [module, op] = CloneModuleFor(while_op); + OpBuilder b(op); + llvm::SmallVector regions; + + auto wrap_region_in_execute = [&, + loc = op.getLoc()](mlir::Region& region) { + regions + .emplace_back(b.create( + loc, + region.getBlocks().front().getTerminator()->getOperandTypes(), + mlir::ValueRange{})) + .getRegion() + .takeBody(region); + }; + + wrap_region_in_execute(op.getBefore()); + // Replace the condition terminator with a yield terminator. + { + auto& before_block = regions[0].getRegion().getBlocks().front(); + OpBuilder before_builder(before_block.getTerminator()); + IRRewriter before_rewriter(before_builder); + before_rewriter.replaceOpWithNewOp( + before_block.getTerminator(), + before_block.getTerminator()->getOperands()); + } + + // Clone the execute region ops the requested number of times. + if (num_executions > 0) { + wrap_region_in_execute(op.getAfter()); + for (int64_t i = 0; i < num_executions - 1; ++i) { + b.insert(regions.emplace_back(regions[0].clone())); + b.insert(regions.emplace_back(regions[1].clone())); + } + b.insert(regions.emplace_back(regions[0].clone())); + } + + // Rewire region arguments and erase them. + for (int64_t i = 0; i < regions.size(); ++i) { + auto args = i == 0 ? ValueRange{op.getOperands()} + : ValueRange{regions[i - 1].getResults()}; + bool is_after_region = (i & 1) == 1; + auto& region = regions[i].getRegion(); + for (int64_t arg = static_cast(region.getNumArguments()) - 1; + arg >= 0; --arg) { + region.getArgument(arg).replaceAllUsesWith( + args[is_after_region ? arg + 1 : arg]); + region.eraseArgument(arg); + } + } + op->replaceAllUsesWith(regions.back().getResults().drop_front(1)); + op->erase(); + return std::move(module); + }); + } + return result; +} + +SmallVector()>> ReduceScfForallBounds( + BisectState&, scf::ForallOp forall_op) { + SmallVector new_upper_bound{forall_op.getMixedUpperBound()}; + OpBuilder b(forall_op); + bool any_replaced = false; + for (auto& ub : new_upper_bound) { + auto constant_or = mlir::getConstantIntValue(ub); + if (!constant_or.has_value()) { + continue; + } + any_replaced = true; + ub = b.getIndexAttr(*constant_or - 1); + } + SmallVector()>> result; + if (!any_replaced) { + return result; + } + result.push_back([=]() -> OwningOpRef { + auto [module, op] = CloneModuleFor(forall_op); + OpBuilder b(op); + SmallVector dynamic_upper_bound; + SmallVector static_upper_bound; + dispatchIndexOpFoldResults(new_upper_bound, dynamic_upper_bound, + static_upper_bound); + op.getDynamicUpperBoundMutable().assign(dynamic_upper_bound); + op.setStaticUpperBound(static_upper_bound); + return std::move(module); + }); + return result; +} + +REGISTER_MLIR_REDUCE_STRATEGY(ReduceScfForallBounds); +REGISTER_MLIR_REDUCE_STRATEGY(InlineScfWhile); + +} // namespace +} // namespace bisect +} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/BUILD b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/BUILD new file mode 100644 index 00000000000000..3cca2e53bb6945 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/BUILD @@ -0,0 +1,13 @@ +load("//xla:lit.bzl", "lit_test_suite") + +# copybara:uncomment package(default_applicable_licenses = ["//tensorflow:license"]) + +lit_test_suite( + name = "all_tests", + srcs = glob(["*.mlir"]), + cfg = "//xla:lit.cfg.py", + tools = [ + "//xla/mlir/tools/mlir_bisect:mlir-bisect", + "@llvm-project//llvm:FileCheck", + ], +) diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/erase-op-without-results.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/erase-op-without-results.mlir new file mode 100644 index 00000000000000..e918e112fe46f3 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/erase-op-without-results.mlir @@ -0,0 +1,12 @@ +// RUN: mlir-bisect %s --debug-strategy=EraseOpWithoutResults | FileCheck %s + +func.func @main() -> memref { + %a = arith.constant 1 : i32 + %b = memref.alloc() : memref + memref.store %a, %b[] : memref + func.return %b : memref +} + +// CHECK: func.func @main() +// CHECK: %[[ALLOC:.*]] = memref.alloc +// CHECK-NEXT: return %[[ALLOC]] diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/inline-scf-while.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/inline-scf-while.mlir new file mode 100644 index 00000000000000..6c9deddbc37cb5 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/inline-scf-while.mlir @@ -0,0 +1,40 @@ +// RUN: mlir-bisect %s --debug-strategy=InlineScfWhile | FileCheck %s + +func.func @main() -> i64 { + %c0 = arith.constant 0 : i64 + %c1 = arith.constant 1 : i64 + %c4 = arith.constant 4 : i64 + %alloc = memref.alloc() : memref + memref.store %c0, %alloc[] : memref + %ret = scf.while(%arg0 = %c0): (i64) -> (i64) { + %cond = arith.cmpi slt, %arg0, %c4 : i64 + scf.condition(%cond) %arg0 : i64 + } do { + ^bb0(%arg1: i64): + %add = arith.addi %arg1, %c1 : i64 + scf.yield %add : i64 + } + return %ret : i64 +} + +// CHECK: func @main +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 +// CHECK: %[[RET:.*]]:2 = scf.execute_region +// CHECK: arith.cmpi slt, %[[C0]], %[[C4]] +// CHECK: yield {{.*}}, %[[C0]] +// CHECK: return %[[RET]]#1 + +// CHECK: func @main +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 +// CHECK: %[[BEFORE0:.*]]:2 = scf.execute_region +// CHECK: arith.cmpi +// CHECK: yield {{.*}}, %[[C0]] +// CHECK: %[[AFTER:.*]] = scf.execute_region +// CHECK: %[[ADD:.*]] = arith.addi %[[BEFORE0]]#1, %[[C1]] +// CHECK: yield %[[ADD]] +// CHECK: %[[BEFORE1:.*]]:2 = scf.execute_region +// CHECK: arith.cmpi +// CHECK: yield {{.*}}, %[[AFTER]] +// CHECK: return %[[BEFORE1]]#1 \ No newline at end of file diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/reduce-scf-forall-bounds.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/reduce-scf-forall-bounds.mlir new file mode 100644 index 00000000000000..61f289d3c5cd6c --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/reduce-scf-forall-bounds.mlir @@ -0,0 +1,16 @@ +// RUN: mlir-bisect %s --debug-strategy=ReduceScfForallBounds | FileCheck %s + +func.func @main() -> tensor<8xindex> { + %init = tensor.empty() : tensor<8xindex> + %iota = scf.forall (%i) = (0) to (8) step (1) + shared_outs (%init_ = %init) -> (tensor<8xindex>) { + %tensor = tensor.from_elements %i : tensor<1xindex> + scf.forall.in_parallel { + tensor.parallel_insert_slice %tensor into %init_[%i] [1] [1] + : tensor<1xindex> into tensor<8xindex> + } + } + func.return %iota : tensor<8xindex> +} +// CHECK: func @main() +// CHECK: scf.forall ({{.*}}) in (7) diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-constant.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-constant.mlir new file mode 100644 index 00000000000000..171472ad733642 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-constant.mlir @@ -0,0 +1,26 @@ +// RUN: mlir-bisect %s --debug-strategy=ReplaceOpWithConstant | FileCheck %s + +func.func @main() -> tensor<2xi32> { + %a = arith.constant dense<3> : tensor<2xi32> + %b = arith.constant dense<2> : tensor<2xi32> + %c = mhlo.add %a, %b : tensor<2xi32> + %d = mhlo.multiply %b, %c : tensor<2xi32> + func.return %d : tensor<2xi32> +} + +// CHECK: func.func @main() +// CHECK-NEXT: arith.constant dense<3> +// CHECK-NEXT: arith.constant dense<2> +// CHECK-NEXT: arith.constant dense<5> +// CHECK-NEXT: %[[ADD:.*]] = mhlo.add +// CHECK-NOT: %[[ADD]] +// CHECK-NEXT: mhlo.multiply +// CHECK-NEXT: return + +// CHECK: func.func @main() +// CHECK-NEXT: arith.constant dense<3> +// CHECK-NEXT: arith.constant dense<2> +// CHECK-NEXT: mhlo.add +// CHECK-NEXT: %[[D:.*]] = arith.constant dense<10> +// CHECK-NEXT: mhlo.multiply +// CHECK-NEXT: return %[[D]] diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-value.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-value.mlir new file mode 100644 index 00000000000000..f89f647f14ddc6 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-op-with-value.mlir @@ -0,0 +1,16 @@ +// RUN: mlir-bisect %s --debug-strategy=ReplaceOpWithValue | FileCheck %s + +func.func @main() -> (memref, memref) { + %a = memref.alloc() : memref + %b = memref.alloc() : memref + %c0 = arith.constant 0 : i32 + memref.store %c0, %b[] : memref + return %a, %b : memref, memref +} + +// CHECK: func @main() +// CHECK: %[[ALLOC:.*]] = memref.alloc() +// CHECK-NEXT: memref.alloc +// CHECK-NEXT: constant +// CHECK-NEXT: memref.store {{.*}}, %[[ALLOC]] +// CHECK-NEXT: return %[[ALLOC]], %[[ALLOC]] diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-operand-with-constant.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-operand-with-constant.mlir new file mode 100644 index 00000000000000..7619a8a500c5e4 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/replace-operand-with-constant.mlir @@ -0,0 +1,28 @@ +// RUN: mlir-bisect %s --debug-strategy=ReplaceOperandWithConstant | FileCheck %s + +func.func @main() -> (tensor<2xi32>, tensor<2xi32>) { + %a = arith.constant dense<3> : tensor<2xi32> + %b = arith.constant dense<2> : tensor<2xi32> + %c = mhlo.add %a, %b : tensor<2xi32> + %d = mhlo.multiply %b, %c : tensor<2xi32> + func.return %c, %d : tensor<2xi32>, tensor<2xi32> +} + +// CHECK: func @main() +// CHECK: %[[C2:.*]] = arith.constant dense<2> +// CHECK: %[[ADD:.*]] = mhlo.add +// CHECK: %[[C5:.*]] = arith.constant dense<5> +// CHECK: %[[MUL:.*]] = mhlo.multiply %[[C2]], %[[C5]] : tensor<2xi32> +// CHECK: return %[[ADD]], %[[MUL]] + +// CHECK: func @main() +// CHECK: mhlo.add +// CHECK: %[[MUL:.*]] = mhlo.multiply %cst_0, %0 : tensor<2xi32> +// CHECK: %[[C5:.*]] = arith.constant dense<5> +// CHECK: return %[[C5]], %[[MUL]] + +// CHECK: func @main() +// CHECK: %[[ADD:.*]] = mhlo.add +// CHECK: mhlo.multiply +// CHECK: %[[C10:.*]] = arith.constant dense<10> +// CHECK: return %[[ADD]], %[[C10]] diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/return-operands-of-terminator-operands.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/return-operands-of-terminator-operands.mlir new file mode 100644 index 00000000000000..8584e2a0008fa0 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/return-operands-of-terminator-operands.mlir @@ -0,0 +1,15 @@ +// RUN: mlir-bisect %s --debug-strategy=ReturnOperandsOfTerminatorOperands | FileCheck %s + +func.func @main() -> tensor<2xi32> { + %a = arith.constant dense<3> : tensor<2xi32> + %b = arith.constant dense<2> : tensor<2xi32> + %c = mhlo.add %a, %b : tensor<2xi32> + %d = mhlo.multiply %b, %c : tensor<2xi32> + func.return %d : tensor<2xi32> +} + +// CHECK: @main +// CHECK: %[[C2:.*]] = arith.constant dense<2> +// CHECK: %[[ADD:.*]] = mhlo.add +// CHECK: mhlo.multiply +// CHECK: return %[[C2]], %[[ADD]] \ No newline at end of file diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/truncate-function.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/truncate-function.mlir new file mode 100644 index 00000000000000..af06778bd47c54 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/rewrites/tests/truncate-function.mlir @@ -0,0 +1,31 @@ +// RUN: mlir-bisect %s --debug-strategy=TruncateFunction | FileCheck %s + +// Function to prevent constant folding below. +func.func private @cst() -> tensor<2xi32> { + %cst = arith.constant dense<2> : tensor<2xi32> + return %cst : tensor<2xi32> +} + +func.func @main() -> tensor<2xi32> { + %a = arith.constant dense<1> : tensor<2xi32> + %b = func.call @cst() : () -> tensor<2xi32> + %c = mhlo.add %a, %b : tensor<2xi32> + %d = mhlo.multiply %b, %c : tensor<2xi32> + func.return %d : tensor<2xi32> +} + +// CHECK: func @main() +// CHECK: %[[A:.*]] = arith.constant dense<1> +// CHECK: return %[[A]] + +// CHECK: func @main() +// CHECK: %[[B:.*]] = call @cst() +// CHECK: return %[[B]] + +// CHECK: func @main() +// CHECK: %[[A:.*]] = arith.constant dense<1> +// CHECK: %[[B:.*]] = call @cst() +// CHECK: %[[ADD:.*]] = mhlo.add +// CHECK-DAG: %[[A]] +// CHECK-DAG: %[[B]] +// CHECK: return %[[ADD]] diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.cc b/third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.cc new file mode 100644 index 00000000000000..a48b6a0b8cc2e9 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.cc @@ -0,0 +1,48 @@ +/* Copyright 2023 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "xla/mlir/tools/mlir_bisect/test_passes.h" + +#include "mlir/Dialect/Linalg/IR/Linalg.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project + +namespace mlir { +namespace bisect { +namespace test { +namespace { + +struct BreakLinalgTransposePass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(BreakLinalgTransposePass) + + StringRef getArgument() const final { return "test-break-linalg-transpose"; } + StringRef getDescription() const final { return "breaks linalg transpose"; } + BreakLinalgTransposePass() = default; + + void runOnOperation() override { + getOperation().walk([](linalg::TransposeOp op) { + auto permutation = llvm::to_vector(op.getPermutation()); + std::swap(permutation[0], permutation[1]); + op.setPermutation(permutation); + }); + } +}; +} // namespace + +void RegisterTestPasses() { PassRegistration(); } + +} // namespace test +} // namespace bisect +} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.h b/third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.h new file mode 100644 index 00000000000000..b90f7e6b86529d --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/test_passes.h @@ -0,0 +1,29 @@ +/* Copyright 2023 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_MLIR_TOOLS_MLIR_BISECT_TEST_PASSES_H_ +#define XLA_MLIR_TOOLS_MLIR_BISECT_TEST_PASSES_H_ + +namespace mlir { +namespace bisect { +namespace test { + +void RegisterTestPasses(); + +} +} // namespace bisect +} // namespace mlir + +#endif // XLA_MLIR_TOOLS_MLIR_BISECT_TEST_PASSES_H_ diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/BUILD b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/BUILD new file mode 100644 index 00000000000000..5fb8b35f77d82a --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/BUILD @@ -0,0 +1,17 @@ +load("//xla:lit.bzl", "lit_test_suite") + +# copybara:uncomment package(default_applicable_licenses = ["//tensorflow:license"]) + +lit_test_suite( + name = "all_tests", + srcs = glob(["*.mlir"]), + cfg = "//xla:lit.cfg.py", + data = [ + "snapshot.mlir.pb", + ], + tools = [ + "//xla/mlir/tools/mlir_bisect:mlir-bisect", + "@llvm-project//llvm:FileCheck", + "@llvm-project//llvm:not", + ], +) diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/bisect.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/bisect.mlir new file mode 100644 index 00000000000000..ca839d982c416a --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/bisect.mlir @@ -0,0 +1,46 @@ +// RUN: mlir-bisect %s \ +// RUN: --pass-pipeline="builtin.module(test-break-linalg-transpose)" \ +// RUN: --max-steps-per-run=200 \ +// RUN: | FileCheck %s + +func.func @main() -> (memref<2x2xindex>, memref<2x2xindex>) { + %a = memref.alloc() : memref<2x2xindex> + %b = memref.alloc() : memref<2x2xindex> + %c = memref.alloc() : memref<2x2xindex> + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %c3 = arith.constant 3 : index + scf.for %i = %c0 to %c2 step %c1 { + scf.for %j = %c0 to %c2 step %c1 { + memref.store %i, %a[%i, %j] : memref<2x2xindex> + memref.store %j, %b[%i, %j] : memref<2x2xindex> + } + } + + %i = scf.while: () -> (index) { + %value = memref.load %a[%c0, %c0] : memref<2x2xindex> + %cond = arith.cmpi slt, %value, %c3 : index + scf.condition(%cond) %value : index + } do { + ^bb0(%_: index): + %value = memref.load %a[%c0, %c0] : memref<2x2xindex> + %add = arith.addi %value, %c1 : index + memref.store %add, %a[%c0, %c0] : memref<2x2xindex> + linalg.transpose ins(%b : memref<2x2xindex>) outs(%c : memref<2x2xindex>) + permutation = [1, 0] + memref.copy %c, %b : memref<2x2xindex> to memref<2x2xindex> + scf.yield + } + + return %a, %b : memref<2x2xindex>, memref<2x2xindex> +} + +// CHECK: Final module +// CHECK: func @main() -> memref<2x2xindex> { +// CHECK-NOT: scf.while +// CHECK-NOT: scf.for +// CHECK: linalg.transpose {{.*}} permutation = [1, 0] + +// CHECK: Final module after running pipeline +// CHECK: linalg.transpose {{.*}} permutation = [0, 1] diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/no-bug.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/no-bug.mlir new file mode 100644 index 00000000000000..df343f3bf8b09f --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/no-bug.mlir @@ -0,0 +1,10 @@ +// RUN: not mlir-bisect %s \ +// RUN: --pass-pipeline="builtin.module(test-break-linalg-transpose)" \ +// RUN: | FileCheck %s + +func.func @main() -> memref<2x2xindex> { + %a = memref.alloc() : memref<2x2xindex> + return %a : memref<2x2xindex> +} + +// CHECK: Did not find bug in initial module diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir new file mode 100644 index 00000000000000..916ca47ab0fd8e --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir @@ -0,0 +1,12 @@ +// RUN: not mlir-bisect %s --hlo-snapshot=%s.pb \ +// RUN: --pass-pipeline="builtin.module(test-break-linalg-transpose)" \ +// RUN: | FileCheck %s + +func.func @main(%a: tensor<3x1xi32>, %b: tensor<3x1xi32>) -> tensor<3x1xi32> { + return %a : tensor<3x1xi32> +} + +// CHECK: initial module +// CHECK: func @main() -> tensor<3x1xi32> { +// CHECK{LITERAL}: arith.constant dense<[[2], [-4], [5]]> : tensor<3x1xi32> +// CHECK{LITERAL}: arith.constant dense<[[0], [7], [-5]]> : tensor<3x1xi32> diff --git a/third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir.pb b/third_party/xla/xla/mlir/tools/mlir_bisect/tests/snapshot.mlir.pb new file mode 100644 index 0000000000000000000000000000000000000000..ee3c8f759494db153cd7114783124b1cb7fb5da0 GIT binary patch literal 68 scmWeq;1UpEkz!(I)MDXcVq`F4Vqj3>VfynQ3K&_1u&Q8S{|#3H00LzcG5`Po literal 0 HcmV?d00001 diff --git a/third_party/xla/xla/mlir/tools/mlir_interpreter/BUILD b/third_party/xla/xla/mlir/tools/mlir_interpreter/BUILD new file mode 100644 index 00000000000000..87d94287d3285a --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_interpreter/BUILD @@ -0,0 +1,40 @@ +load("@bazel_skylib//rules:build_test.bzl", "build_test") +load("//xla:xla.bzl", "xla_cc_binary") + +package( + # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], + default_visibility = [":friends"], + licenses = ["notice"], +) + +package_group( + name = "friends", + includes = [ + "//xla:friends", + ], +) + +build_test( + name = "mlir-interpreter-runner_build_test", + targets = [ + ":mlir-interpreter-runner", + ], +) + +xla_cc_binary( + name = "mlir-interpreter-runner", + testonly = True, + srcs = ["mlir_interpreter_runner.cc"], + visibility = ["//visibility:public"], + deps = [ + "//xla/mlir/tools/mlir_interpreter/dialects", + "//xla/mlir/tools/mlir_interpreter/framework", + "//xla/mlir_hlo:hlo_dialect_registration", + "//xla/mlir_hlo:mhlo_passes", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:AllPassesAndDialects", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:ParseUtilities", + "@llvm-project//mlir:Support", + ], +) diff --git a/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/BUILD b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/BUILD new file mode 100644 index 00000000000000..e6f5d71d0e19f6 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/BUILD @@ -0,0 +1,67 @@ +load("@local_tsl//tsl/platform:rules_cc.bzl", "cc_library") + +package( + # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], + default_visibility = [":friends"], + licenses = ["notice"], +) + +package_group( + name = "friends", + includes = [ + "//xla:friends", + ], +) + +cc_library( + name = "dialects", + srcs = glob( + [ + "*.cc", + ], + exclude = ["util.cc"], + ), + deps = [ + ":dialect_utils", + "//xla/mlir/tools/mlir_interpreter/framework", + "//xla/mlir_hlo", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:AffineDialect", + "@llvm-project//mlir:AffineUtils", + "@llvm-project//mlir:ArithDialect", + "@llvm-project//mlir:BufferizationDialect", + "@llvm-project//mlir:ComplexDialect", + "@llvm-project//mlir:FuncDialect", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:LinalgDialect", + "@llvm-project//mlir:MemRefDialect", + "@llvm-project//mlir:SCFDialect", + "@llvm-project//mlir:Support", + "@llvm-project//mlir:TensorDialect", + "@llvm-project//mlir:VectorDialect", + ], + alwayslink = 1, +) + +cc_library( + name = "dialect_utils", + srcs = [ + "util.cc", + ], + hdrs = [ + "comparators.h", + "cwise_math.h", + "util.h", + ], + deps = [ + "//xla/mlir/tools/mlir_interpreter/framework", + "@com_google_absl//absl/strings", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:ArithDialect", + "@llvm-project//mlir:FuncDialect", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Support", + "@llvm-project//mlir:TensorDialect", + "@llvm-project//mlir:ViewLikeInterface", + ], +) diff --git a/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/affine.cc b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/affine.cc new file mode 100644 index 00000000000000..3b952f5c4e54db --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/affine.cc @@ -0,0 +1,53 @@ +/* Copyright 2022 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "llvm/ADT/SmallVector.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "xla/mlir/tools/mlir_interpreter/dialects/util.h" +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter.h" +#include "xla/mlir/tools/mlir_interpreter/framework/registration.h" + +namespace mlir { +namespace interpreter { +namespace { + +llvm::SmallVector Apply(InterpreterState&, affine::AffineApplyOp op, + ArrayRef operands) { + return EvalAffineMap(op.getAffineMap(), operands); +} + +int64_t Min(InterpreterState&, affine::AffineMinOp op, + ArrayRef operands) { + auto results = EvalAffineMap(op.getAffineMap(), operands); + return *std::min_element(results.begin(), results.end()); +} + +int64_t Max(InterpreterState&, affine::AffineMaxOp op, + ArrayRef operands) { + auto results = EvalAffineMap(op.getAffineMap(), operands); + return *std::max_element(results.begin(), results.end()); +} + +REGISTER_MLIR_INTERPRETER_OP(Apply); +REGISTER_MLIR_INTERPRETER_OP(Max); +REGISTER_MLIR_INTERPRETER_OP(Min); + +} // namespace +} // namespace interpreter +} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/arith.cc b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/arith.cc new file mode 100644 index 00000000000000..17fd94c3019141 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/arith.cc @@ -0,0 +1,311 @@ +/* Copyright 2022 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/Dialect/Arith/IR/Arith.h" // from @llvm-project + +#include // NOLINT + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ErrorHandling.h" +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/BuiltinAttributes.h" // from @llvm-project +#include "mlir/IR/BuiltinTypeInterfaces.h" // from @llvm-project +#include "mlir/IR/Types.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "xla/mlir/tools/mlir_interpreter/dialects/comparators.h" +#include "xla/mlir/tools/mlir_interpreter/dialects/cwise_math.h" +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter.h" +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter_value.h" +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter_value_util.h" +#include "xla/mlir/tools/mlir_interpreter/framework/registration.h" +#include "xla/mlir/tools/mlir_interpreter/framework/tensor_or_memref.h" + +namespace mlir { +namespace interpreter { +namespace { + +InterpreterValue Bitcast(InterpreterState&, arith::BitcastOp op, + const InterpreterValue& in) { + Type ty = op->getResultTypes()[0]; + auto shaped_ty = dyn_cast(ty); + auto result = DispatchScalarType(ty, [&](auto dummy) -> InterpreterValue { + TensorOrMemref result; + result.view = {}; + if (shaped_ty) { + result.buffer = in.Clone().Buffer(); + } else { + result.buffer = in.AsUnitTensor().Buffer(); + } + return {result}; + }); + if (!shaped_ty) { + return result.ExtractElement({}); + } + auto& outView = result.View(); + outView.strides = BufferView::GetDefaultStrides(shaped_ty.getShape()); + outView.sizes = llvm::to_vector(shaped_ty.getShape()); + return result; +} + +InterpreterValue Constant(InterpreterState&, arith::ConstantOp constant) { + auto ty = constant->getResultTypes()[0]; + auto shaped_ty = dyn_cast(ty); + auto elem_ty = shaped_ty ? shaped_ty.getElementType() : ty; + return DispatchScalarType(elem_ty, [&](auto dummy) -> InterpreterValue { + using T = decltype(dummy); + if (shaped_ty) { + auto values = cast(constant.getValue()).getValues(); + auto result = TensorOrMemref::Empty(shaped_ty.getShape()); + auto valueIt = values.begin(); + result.view.is_vector = isa(shaped_ty); + for (const auto& index : result.view.Indices(true)) { + result.at(index) = *valueIt; + ++valueIt; + } + return {result}; + } + + auto value = constant.getValue(); + if (auto integer = value.dyn_cast()) { + return {static_cast(integer.getInt())}; + } + if (auto floatValue = value.dyn_cast()) { + return {static_cast(floatValue.getValueAsDouble())}; + } + + llvm_unreachable("unsupported constant type"); + }); +} + +template +InterpreterValue IntCast(InterpreterState&, Op op, + const InterpreterValue& arg) { + if (arg.IsTensor()) { + return DispatchScalarType( + op->getResultTypes()[0], [&](auto dummy) -> InterpreterValue { + auto result = TensorOrMemref::EmptyLike(arg.View()); + for (const auto& index : result.view.Indices()) { + result.at(index) = + static_cast(arg.ExtractElement(index).AsInt()); + } + return {result}; + }); + } + + return DispatchScalarType( + op->getResultTypes()[0], [&](auto dummy) -> InterpreterValue { + return {static_cast(arg.AsInt())}; + }); +} + +template +InterpreterValue FloatCast(InterpreterState&, Op op, + const InterpreterValue& arg) { + if (arg.IsTensor()) { + return DispatchScalarType( + op->getResultTypes()[0], [&](auto dummy) -> InterpreterValue { + auto result = TensorOrMemref::EmptyLike(arg.View()); + for (const auto& index : result.view.Indices()) { + result.at(index) = static_cast( + arg.ExtractElement(index).AsDouble()); + } + return {result}; + }); + } + + return DispatchScalarType( + op->getResultTypes()[0], [&](auto dummy) -> InterpreterValue { + return {static_cast(arg.AsDouble())}; + }); +} + +llvm::SmallVector UiToFP( + MutableArrayRef args, mlir::Operation* op, + InterpreterState&) { + if (args[0].IsTensor()) { + auto ty = op->getResultTypes()[0].cast(); + return {DispatchScalarType( + ty.getElementType(), [&](auto dummy) -> InterpreterValue { + auto result = + TensorOrMemref::EmptyLike(args[0].View()); + for (const auto& index : result.view.Indices()) { + result.at(index) = static_cast( + args[0].ExtractElement(index).AsUInt()); + } + return {result}; + })}; + } + + return {DispatchScalarType( + op->getResultTypes()[0], [&](auto dummy) -> InterpreterValue { + return {static_cast(args[0].AsUInt())}; + })}; +} + +InterpreterValue CmpI(InterpreterState&, arith::CmpIOp compare, + const InterpreterValue& lhs, + const InterpreterValue& rhs) { + switch (compare.getPredicate()) { + case arith::CmpIPredicate::eq: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpIPredicate::ne: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpIPredicate::slt: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpIPredicate::sle: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpIPredicate::sgt: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpIPredicate::sge: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpIPredicate::ult: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpIPredicate::ule: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpIPredicate::ugt: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpIPredicate::uge: + return ApplyCwiseBinaryMap(lhs, rhs); + } +} + +template +struct ConstFunctor : CwiseAll { + template + static bool Apply(T, T) { + return value; + } +}; + +InterpreterValue CmpF(InterpreterState&, arith::CmpFOp compare, + const InterpreterValue& lhs, + const InterpreterValue& rhs) { + switch (compare.getPredicate()) { + case arith::CmpFPredicate::AlwaysFalse: + return ApplyCwiseBinaryMap>(lhs, rhs); + case arith::CmpFPredicate::OEQ: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::OGT: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::OGE: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::OLT: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::OLE: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::ONE: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::ORD: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::UEQ: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::UGT: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::UGE: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::ULT: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::ULE: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::UNE: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::UNO: + return ApplyCwiseBinaryMap(lhs, rhs); + case arith::CmpFPredicate::AlwaysTrue: + return ApplyCwiseBinaryMap>(lhs, rhs); + } +} + +InterpreterValue Select(InterpreterState& state, arith::SelectOp, + const InterpreterValue& cond, + const InterpreterValue& trueValue, + const InterpreterValue& falseValue) { + if (std::holds_alternative(cond.storage)) { + return std::get(cond.storage) ? trueValue : falseValue; + } + + if (!cond.IsTensor() && !cond.View().is_vector) { + state.AddFailure("select requires a scalar or vector argument"); + return {}; + } + + auto ret = trueValue.Clone(); + for (const auto& index : cond.View().Indices(/*include_vector_dims=*/true)) { + if (cond.ExtractElement(index).AsInt() == 0) { + ret.InsertElement(index, falseValue.ExtractElement(index)); + } + } + return ret; +} + +template +struct ExtFFunctor : CwiseFloat { + template + static R Apply(A v) { + return v; + } +}; + +InterpreterValue ExtF(InterpreterState&, arith::ExtFOp op, + const InterpreterValue& in) { + return DispatchScalarType( + op->getResultTypes()[0], [&](auto dummy) -> InterpreterValue { + return ApplyCwiseMap>(in); + }); +} + +REGISTER_MLIR_INTERPRETER_OP("arith.addf", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("arith.andi", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("arith.divf", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("arith.extui", UiToFP); +REGISTER_MLIR_INTERPRETER_OP("arith.maxf", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("arith.minf", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("arith.mulf", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("arith.negf", ApplyCwiseMap); +REGISTER_MLIR_INTERPRETER_OP("arith.ori", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("arith.remf", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("arith.subf", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("arith.uitofp", UiToFP); +REGISTER_MLIR_INTERPRETER_OP("arith.xori", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("arith.shrui", + ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("arith.shrsi", + ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("arith.shli", ApplyCwiseBinaryMap); + +// The float implementations support ints too. +REGISTER_MLIR_INTERPRETER_OP("arith.addi", "arith.addf"); +REGISTER_MLIR_INTERPRETER_OP("arith.divsi", "arith.divf"); +REGISTER_MLIR_INTERPRETER_OP("arith.maxsi", "arith.maxf"); +REGISTER_MLIR_INTERPRETER_OP("arith.minsi", "arith.minf"); +REGISTER_MLIR_INTERPRETER_OP("arith.muli", "arith.mulf"); +REGISTER_MLIR_INTERPRETER_OP("arith.remsi", "arith.remf"); +REGISTER_MLIR_INTERPRETER_OP("arith.subi", "arith.subf"); + +REGISTER_MLIR_INTERPRETER_OP(Bitcast); +REGISTER_MLIR_INTERPRETER_OP(CmpF); +REGISTER_MLIR_INTERPRETER_OP(CmpI); +REGISTER_MLIR_INTERPRETER_OP(Constant); +REGISTER_MLIR_INTERPRETER_OP(ExtF); +REGISTER_MLIR_INTERPRETER_OP(FloatCast); +REGISTER_MLIR_INTERPRETER_OP(IntCast); +REGISTER_MLIR_INTERPRETER_OP(IntCast); +REGISTER_MLIR_INTERPRETER_OP(IntCast); +REGISTER_MLIR_INTERPRETER_OP(IntCast); +REGISTER_MLIR_INTERPRETER_OP(Select); + +} // namespace +} // namespace interpreter +} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/bufferization.cc b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/bufferization.cc new file mode 100644 index 00000000000000..1561d54b4b31eb --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/bufferization.cc @@ -0,0 +1,70 @@ +/* Copyright 2022 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" // from @llvm-project + +#include // NOLINT +#include // NOLINT + +#include "xla/mlir/tools/mlir_interpreter/dialects/util.h" +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter.h" +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter_value.h" +#include "xla/mlir/tools/mlir_interpreter/framework/registration.h" + +namespace mlir { +namespace interpreter { +namespace { + +InterpreterValue ToTensor(InterpreterState&, bufferization::ToTensorOp, + const InterpreterValue& in) { + return in.Clone(); +} + +InterpreterValue ToMemref(InterpreterState&, bufferization::ToMemrefOp, + const InterpreterValue& in) { + return in; +} + +InterpreterValue AllocTensor( + InterpreterState&, bufferization::AllocTensorOp alloc, + ArrayRef dynamic_sizes, std::optional copy, + const std::optional& /*sizeHint*/) { + auto ty = alloc->getResultTypes().front().cast(); + auto shape = ReplaceDynamicVals(ty.getShape(), dynamic_sizes); + + if (copy) { + return copy->Clone(); + } + return InterpreterValue::MakeTensor(ty.getElementType(), shape); +} + +InterpreterValue Clone(InterpreterState& state, bufferization::CloneOp, + const InterpreterValue& in) { + if (auto* stats = state.GetOptions().stats) { + stats->heap_size += in.Buffer()->GetByteSize(); + stats->peak_heap_size = std::max(stats->peak_heap_size, stats->heap_size); + ++stats->num_allocations; + } + return in.Clone(); +} + +REGISTER_MLIR_INTERPRETER_OP(AllocTensor); +REGISTER_MLIR_INTERPRETER_OP(Clone); +REGISTER_MLIR_INTERPRETER_OP(ToMemref); +REGISTER_MLIR_INTERPRETER_OP(ToTensor); + +} // namespace +} // namespace interpreter +} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/builtin.cc b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/builtin.cc new file mode 100644 index 00000000000000..48b8008dc07cfc --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/builtin.cc @@ -0,0 +1,64 @@ +/* Copyright 2022 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "mlir/IR/BuiltinTypeInterfaces.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter.h" +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter_value.h" +#include "xla/mlir/tools/mlir_interpreter/framework/registration.h" +#include "xla/mlir/tools/mlir_interpreter/framework/tensor_or_memref.h" + +namespace mlir { +namespace interpreter { +namespace { + +llvm::SmallVector UnrealizedConversionCast( + MutableArrayRef args, mlir::Operation* op, + InterpreterState&) { + auto result_ty = op->getResultTypes()[0]; + auto operand_ty = op->getOperandTypes()[0]; + if (result_ty == operand_ty) { + return {args[0]}; + } + + if (auto r = llvm::dyn_cast(result_ty)) { + if (auto o = llvm::dyn_cast(operand_ty)) { + if (verifyCompatibleShapes({o, r}).succeeded()) { + return {DispatchScalarType(r, [&](auto dummy) -> InterpreterValue { + TensorOrMemref result; + result.view = args[0].View(); + result.buffer = args[0].Buffer(); + return {result}; + })}; + } + } + } + + llvm::errs() << "Unimplemented cast: " << *op << "\n"; + llvm_unreachable("unimplemented cast"); +} + +REGISTER_MLIR_INTERPRETER_OP("builtin.unrealized_conversion_cast", + UnrealizedConversionCast); + +} // namespace +} // namespace interpreter +} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/comparators.h b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/comparators.h new file mode 100644 index 00000000000000..c26b75af6ae660 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/comparators.h @@ -0,0 +1,105 @@ +/* Copyright 2022 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_MLIR_TOOLS_MLIR_INTERPRETER_DIALECTS_COMPARATORS_H_ +#define XLA_MLIR_TOOLS_MLIR_INTERPRETER_DIALECTS_COMPARATORS_H_ + +#include +#include +#include + +#include "llvm/Support/ErrorHandling.h" +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter_value_util.h" + +namespace mlir { +namespace interpreter { + +// Despite the name, this works on integers and complex too. +template +struct FloatCompare : CwiseAll { + template + static bool Apply(T a, T b) { + if (isnan(a) || isnan(b)) return nan_result; + if constexpr (v == 0) { + // For complex eq/ne. + return (a == b) == r; + } else if constexpr (std::is_floating_point_v || std::is_integral_v) { + auto cmp = a > b ? 1 : (a < b ? -1 : 0); + return (cmp == v) == r; + } else { + llvm_unreachable("operation not supported for this type"); + } + } + + template + static bool isnan(T a) { + return std::isnan(a); + } + template + static bool isnan(std::complex a) { + return std::isnan(std::real(a)) || std::isnan(std::imag(a)); + } +}; + +using Foeq = FloatCompare<0, true, false>; +using Foge = FloatCompare<-1, false, false>; +using Fogt = FloatCompare<1, true, false>; +using Fole = FloatCompare<1, false, false>; +using Folt = FloatCompare<-1, true, false>; +using Fone = FloatCompare<0, false, false>; +using Ford = FloatCompare<99, false, false>; +using Fueq = FloatCompare<0, true, true>; +using Fuge = FloatCompare<-1, false, true>; +using Fugt = FloatCompare<1, true, true>; +using Fule = FloatCompare<1, false, true>; +using Fult = FloatCompare<-1, true, true>; +using Fune = FloatCompare<0, false, true>; +using Funo = FloatCompare<99, true, true>; + +template +struct UnsignedCompare : CwiseInt { + template + static bool Apply(T a, T b) { + using U = std::make_unsigned_t; + auto a_u = static_cast(a); + auto b_u = static_cast(b); + auto cmp = a_u > b_u ? 1 : (a_u < b_u ? -1 : 0); + return (cmp == v) == r; + } +}; + +using Iuge = UnsignedCompare<-1, false>; +using Iule = UnsignedCompare<1, false>; +using Iugt = UnsignedCompare<1, true>; +using Iult = UnsignedCompare<-1, true>; + +struct Iumax { + template + static T apply(T a, T b) { + return Iuge::Apply(a, b) ? a : b; + } +}; + +struct Iumin { + template + static T apply(T a, T b) { + return Iule::Apply(a, b) ? a : b; + } +}; + +} // namespace interpreter +} // namespace mlir + +#endif // XLA_MLIR_TOOLS_MLIR_INTERPRETER_DIALECTS_COMPARATORS_H_ diff --git a/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/complex.cc b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/complex.cc new file mode 100644 index 00000000000000..35ab806eef8ca3 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/complex.cc @@ -0,0 +1,68 @@ +/* Copyright 2023 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/Dialect/Complex/IR/Complex.h" // from @llvm-project + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ErrorHandling.h" +#include "mlir/IR/BuiltinAttributes.h" // from @llvm-project +#include "xla/mlir/tools/mlir_interpreter/dialects/cwise_math.h" +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter.h" +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter_value.h" +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter_value_util.h" +#include "xla/mlir/tools/mlir_interpreter/framework/registration.h" + +namespace mlir { +namespace interpreter { +namespace { + +InterpreterValue Constant(InterpreterState&, complex::ConstantOp constant) { + auto ty = constant->getResultTypes()[0]; + return DispatchScalarType(ty, [&](auto dummy) -> InterpreterValue { + if constexpr (is_complex_v) { + using T = typename decltype(dummy)::value_type; + auto values = + llvm::to_vector(constant.getValue().getAsValueRange()); + return {decltype(dummy){static_cast(values[0].convertToDouble()), + static_cast(values[1].convertToDouble())}}; + } else { + llvm_unreachable("invalid constant"); + } + }); +} + +REGISTER_MLIR_INTERPRETER_OP("complex.abs", "math.absf"); +REGISTER_MLIR_INTERPRETER_OP("complex.add", "arith.addf"); +REGISTER_MLIR_INTERPRETER_OP("complex.cos", ApplyCwiseMap); +REGISTER_MLIR_INTERPRETER_OP("complex.create", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("complex.div", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("complex.exp", ApplyCwiseMap); +REGISTER_MLIR_INTERPRETER_OP("complex.expm1", ApplyCwiseMap); +REGISTER_MLIR_INTERPRETER_OP("complex.im", ApplyCwiseMap); +REGISTER_MLIR_INTERPRETER_OP("complex.log", ApplyCwiseMap); +REGISTER_MLIR_INTERPRETER_OP("complex.log1p", ApplyCwiseMap); +REGISTER_MLIR_INTERPRETER_OP("complex.mul", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("complex.neg", ApplyCwiseMap); +REGISTER_MLIR_INTERPRETER_OP("complex.pow", ApplyCwiseBinaryMap); +REGISTER_MLIR_INTERPRETER_OP("complex.re", ApplyCwiseMap); +REGISTER_MLIR_INTERPRETER_OP("complex.rsqrt", ApplyCwiseMap); +REGISTER_MLIR_INTERPRETER_OP("complex.sin", ApplyCwiseMap); +REGISTER_MLIR_INTERPRETER_OP("complex.sqrt", ApplyCwiseMap); +REGISTER_MLIR_INTERPRETER_OP("complex.tanh", ApplyCwiseMap); +REGISTER_MLIR_INTERPRETER_OP(Constant); + +} // namespace +} // namespace interpreter +} // namespace mlir diff --git a/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/cwise_math.h b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/cwise_math.h new file mode 100644 index 00000000000000..6f36f8a65a8142 --- /dev/null +++ b/third_party/xla/xla/mlir/tools/mlir_interpreter/dialects/cwise_math.h @@ -0,0 +1,242 @@ +/* Copyright 2023 The OpenXLA Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef XLA_MLIR_TOOLS_MLIR_INTERPRETER_DIALECTS_CWISE_MATH_H_ +#define XLA_MLIR_TOOLS_MLIR_INTERPRETER_DIALECTS_CWISE_MATH_H_ + +#include +#include +#include +#include +#include + +#include "xla/mlir/tools/mlir_interpreter/framework/interpreter_value_util.h" + +namespace mlir { +namespace interpreter { + +struct ATan2 : CwiseReal { + template + static T Apply(T a, T b) { + return std::atan2(a, b); + } +}; + +struct Clz : CwiseInt { + template + static T Apply(T a) { + if (!a) { + // Return something well-defined for zeroes. + return sizeof(T{}) * CHAR_BIT; + } + return __builtin_clzl( + static_cast(static_cast>(a))) - + (sizeof(uint64_t) - sizeof(T{})) * CHAR_BIT; + } +}; + +struct Ctz : CwiseInt { + template + static T Apply(T a) { + if (!a) { + // Return something well-defined for zeroes. + return sizeof(T{}) * CHAR_BIT; + } + return __builtin_ctzl(static_cast(a)); + } +}; + +struct Complex : CwiseFloat { + template + static std::complex Apply(T a, T b) { + return {a, b}; + } +}; + +struct Max : CwiseReal { + template + static T Apply(T a, T b) { + return std::max(a, b); + } +}; + +struct Min : CwiseReal { + template + static T Apply(T a, T b) { + return std::min(a, b); + } +}; + +struct Power : CwiseArith { + template + static T Apply(T a, T b) { + if constexpr (std::is_integral_v) { + if constexpr (std::is_signed_v) { + if (b < 0) { + return a == 1 ? 1 : 0; + } + } + T result = 1; + while (b > 0) { + if (b & 1) result *= a; + b >>= 1; + if (b) { + a *= a; + } + } + return result; + } else { + return std::pow(a, b); + } + } +}; + +struct Remainder : CwiseReal { + template + static T Apply(T a, T b) { + if constexpr (std::is_integral_v) { + return a % b; + } else { + return std::fmod(a, b); + } + } +}; + +struct ShiftRightArith : CwiseInt { + template + static T Apply(T a, T b) { + return b >= sizeof(T) * CHAR_BIT ? 0 : (a >> b); + } +}; + +struct ShiftRightLogical : CwiseInt { + template + static T Apply(T a, T b) { + return b >= sizeof(T) * CHAR_BIT + ? 0 + : static_cast>(a) >> b; + } +}; + +struct ShiftLeft : CwiseInt { + template + static T Apply(T a, T b) { + return b >= sizeof(T) * CHAR_BIT ? 0 : (a << b); + } +}; + +namespace detail { +template