From 19c91c98ba8b07b4ff98fb10bacc753638d22476 Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Mon, 25 Jul 2022 19:34:24 -0400 Subject: [PATCH 01/30] Initial comimt for col2im cpu kernel --- .../contrib_ops/cpu/cpu_contrib_kernels.cc | 2 + .../core/graph/contrib_ops/contrib_defs.cc | 186 ++++++++++++++++++ onnxruntime/core/graph/contrib_ops/ms_opset.h | 2 + .../core/providers/cpu/tensor/col2im.cc | 31 +++ .../core/providers/cpu/tensor/col2im.h | 63 ++++++ .../tools/pytorch_export_contrib_ops.py | 3 + .../python/contrib_ops/onnx_test_col2im.py | 55 ++++++ .../kernel_def_hashes/contrib.cpu.json | 4 + 8 files changed, 346 insertions(+) create mode 100644 onnxruntime/core/providers/cpu/tensor/col2im.cc create mode 100644 onnxruntime/core/providers/cpu/tensor/col2im.h create mode 100644 onnxruntime/test/python/contrib_ops/onnx_test_col2im.py diff --git a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc index 0de091a9a4a0f..ed02d793f22d5 100644 --- a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc +++ b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc @@ -10,6 +10,7 @@ namespace contrib { class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SampleOp); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, Col2Im); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GridSample); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, Attention); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, BeamSearch); @@ -187,6 +188,7 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, // add more kernels here + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index 921d44716f12b..6c0a2389de1f4 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -904,6 +904,192 @@ ONNX_MS_OPERATOR_SET_SCHEMA(IsAllFinite, 1, updateOutputElemType(ctx, 0, ONNX_NAMESPACE::TensorProto::BOOL); })); +void col2imShapeInference(InferenceContext& ctx) { + propagateElemTypeFromInputToOutput(ctx, 0, 0); + + // All inputs shapes are required + if (!hasNInputShapes(ctx, 3)) { + return; + } + + // TODO: Assume image_shape has correct spatial dimensions for next validations + // An alternative is get the the number of spatial dimensions as an input + if (ctx.getInputType(1)->tensor_type().shape().dim_size() != 1) { + fail_shape_inference("image_shape tensor must have rank 1."); + } + size_t n_input_dims = ctx.getInputType(1)->tensor_type().shape().dim(0).dim_value(); + std::vector image_shape = {}; + const TensorProto* image_shape_data = ctx.getInputData(1); + if (image_shape_data) { + image_shape = ParseData(image_shape_data); + if (image_shape.size() != n_input_dims) { + fail_shape_inference("image_shape tensor must have ", n_input_dims, " spatial dimensions."); + } + } + + std::vector pads = {}; + if (getRepeatedAttribute(ctx, "pads", pads)) { + if ((pads.size() != 0) && (pads.size() != n_input_dims * 2)) { + fail_shape_inference("Attribute pads has incorrect size"); + } + } + + std::vector dilations = {}; + if (getRepeatedAttribute(ctx, "dilations", dilations)) { + if ((dilations.size() != 0) && (dilations.size() != n_input_dims)) { + fail_shape_inference("Attribute dilations has incorrect size"); + } + } + + std::vector strides = {}; + if (getRepeatedAttribute(ctx, "strides", strides)) { + if ((strides.size() != 0) && (strides.size() != n_input_dims)) { + fail_shape_inference("Attribute strides has incorrect size"); + } + } + + auto input_shape = ctx.getInputType(0)->tensor_type().shape(); + if (input_shape.dim_size() != 3) { + fail_shape_inference("input must have rank 3."); + } + + std::vector block_shape = {}; + const TensorProto* block_shape_data = ctx.getInputData(2); + if (block_shape_data) { + block_shape = ParseData(block_shape_data); + if (block_shape.size() != n_input_dims) { + fail_shape_inference("block_shape tensor must have ", n_input_dims, " spatial dimensions."); + } + } + if (ctx.getInputType(2)->tensor_type().shape().dim_size() != 1) { + fail_shape_inference("block_shape tensor must have rank 1."); + } else if ( + (ctx.getInputType(2)->tensor_type().shape().dim(0).has_dim_value()) && + (ctx.getInputType(2)->tensor_type().shape().dim(0).dim_value() != static_cast(n_input_dims))) { + fail_shape_inference("block_shape tensor must have ", n_input_dims, " spatial dimensions."); + } + + int block_shape_size = 0; + if (static_cast(block_shape.size()) > 0) { + block_shape_size = 1; + for (const auto& dim : block_shape) { + block_shape_size *= dim; + } + } + + // Final shape will be (N, C, dim_1, ..., dim_N) + auto final_image_shape = ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape(); + + // Dimensions N and C are always present + Dim N, C; + if (ctx.getInputType(0)->tensor_type().shape().dim(0).has_dim_value()) { + N = input_shape.dim(0); // Otherwise, N is unknown. + } + *final_image_shape->add_dim() = N; + + if (block_shape_size > 0) { + C = input_shape.dim(1) / block_shape_size; // Otherwise, C is unknown. + } + *final_image_shape->add_dim() = C; + + // Image dimensions are dynamic + for (size_t i = 0; i < n_input_dims; ++i) { + Dim image_dim_i; + if (image_shape.size() > 0) { + image_dim_i.set_dim_value(image_shape[i]); // Otherwise, spatial dimensions are unknown + } + *final_image_shape->add_dim() = image_dim_i; + } + return; +} + +constexpr const char* Col2Im_ver1_doc = R"DOC( +The operator rearranges column blocks back into a multidimensional image + +Col2Im behaves similarly to PyTorch's fold https://pytorch.org/docs/stable/generated/torch.nn.Fold.html, +but it only supports *batched* multi-dimensional image tensors. + +NOTE: Although specifying image_shape looks redundant because it could be calculated from + convolution formulas, it is required as input for more advanced scenarios as explained + at PyTorch's implementation (https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/Col2Im.cpp#L10) + +)DOC"; + +ONNX_MS_OPERATOR_SET_SCHEMA(Col2Im, 1, + OpSchema() + .SetDoc(Col2Im_ver1_doc) + .Attr( + "dilations", + "1-dimensional tensor with dilation value along each spatial axis of the image. " + "If not present, the dilation defaults to 1 along each spatial axis of the image.", + AttributeProto::INTS, + OPTIONAL_VALUE) + .Attr( + "pads", + "1-dimensional tensor with padding value for the beginning and ending along each spatial axis, " + "it can take any value greater than or equal to 0. " + "The value represent the number of pixels added to the beginning " + "and end part of the corresponding axis. `pads` format should be as follow " + "[x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin is the number of pixels " + "added at the beginning of axis `i` and xi_end is the number of pixels added at the end of axis `i`. " + "If not present, the padding defaults to 0 along start and end of each spatial axis.", + AttributeProto::INTS, + OPTIONAL_VALUE) + .Attr( + "strides", + "1-dimensional tensor with stride value along each spatial axis. " + "If not present, the stride defaults to 1 along each spatial axis.", + AttributeProto::INTS, + OPTIONAL_VALUE) + .Input( + 0, + "input", + "Input data tensor to be rearranged from column blocks back into an image." + " This is a 3-dimensional tensor containing [N, C * n-ary-product(block_shape), L]," + " where N is batch dimension, C is image channel dimension and L is number of blocks.", + "T", + OpSchema::Single, + true, + 1, + OpSchema::Differentiable) + .Input( + 1, + "image_shape", + "The shape of the spatial dimensions of the image after rearranging the column blocks." + "This is a 1-dimensional tensor with size of at least 2, containing the value [H_img, W_img] " + " for a 2-D image or [dim_i1, dim_i2, ..., dim_iN] for a N-D image.", + "tensor(int64)", + OpSchema::Single, + true, + 1, + OpSchema::NonDifferentiable) + .Input( + 2, + "block_shape", + "The shape of the block to apply on the input." + "This is a 1-dimensional tensor of size of at least 2, containing the value [H_block, W_block] " + " for a 2-D image or [dim_b1, dim_b2, ..., dim_bN] for a N-D block.", + "tensor(int64)", + OpSchema::Single, + true, + 1, + OpSchema::NonDifferentiable) + .Output( + 0, + "output", + "Output tensor produced by rearranging blocks into an image.", + "T", + OpSchema::Single, + true, + 1, + OpSchema::Differentiable) + .TypeConstraint( + "T", + OpSchema::all_tensor_types_with_bfloat(), + "Constrain input and output types to all numeric tensor types.") + .TypeAndShapeInferenceFunction([](InferenceContext& ctx) { col2imShapeInference(ctx); }) + ); + constexpr const char* GridSample_ver1_doc = R"DOC( Given an `input` and a flow-field `grid`, computes the `output` using `input` values and pixel locations from `grid`. Currently, only spatial (4-D) inputs are supported. For `input` with shape (N, C, H, W) and `grid` with shape (N, H_out, W_out, 2), diff --git a/onnxruntime/core/graph/contrib_ops/ms_opset.h b/onnxruntime/core/graph/contrib_ops/ms_opset.h index c6850eb8e3516..e3a774a7e9d05 100644 --- a/onnxruntime/core/graph/contrib_ops/ms_opset.h +++ b/onnxruntime/core/graph/contrib_ops/ms_opset.h @@ -56,6 +56,7 @@ class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, FusedMatMul); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GatherND); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Gelu); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GreedySearch); +class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Col2Im); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GridSample); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Inverse); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Irfft); @@ -125,6 +126,7 @@ class OpSet_Microsoft_ver1 { fn(GetOpSchema()); fn(GetOpSchema()); fn(GetOpSchema()); + fn(GetOpSchema()); fn(GetOpSchema()); fn(GetOpSchema()); fn(GetOpSchema()); diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc new file mode 100644 index 0000000000000..4a465d4f99826 --- /dev/null +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/cpu/tensor/col2im.h" + +#include "core/framework/element_type_lists.h" +#include "core/framework/TensorSeq.h" +#include "core/providers/common.h" +#include "core/framework/copy.h" +#include "core/providers/op_kernel_type_control.h" + +namespace onnxruntime { + +#define REGISTER_KERNEL_TYPED(T) \ + ONNX_CPU_OPERATOR_TYPED_KERNEL( \ + Col2Im, \ + 1, \ + T, \ + KernelDefBuilder() \ + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), \ + Col2Im); + +REGISTER_KERNEL_TYPED(float) + +template +Status Col2Im::Compute(OpKernelContext* context) const { + return Status::OK(); +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.h b/onnxruntime/core/providers/cpu/tensor/col2im.h new file mode 100644 index 0000000000000..35afed4c5ed05 --- /dev/null +++ b/onnxruntime/core/providers/cpu/tensor/col2im.h @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/common/common.h" +#include "core/framework/op_kernel.h" +#include "core/util/math_cpuonly.h" +#include "core/framework/tensor.h" +#include "concatbase.h" + +namespace onnxruntime { + +template +class Col2Im final : public OpKernel { + public: + explicit Col2Im(const OpKernelInfo& info) : OpKernel(info) { + // std::string mode_str = info.GetAttrOrDefault("mode", "bilinear"); + // std::string padding_mode_str = info.GetAttrOrDefault("padding_mode", "zeros"); + // align_corners_ = static_cast(info.GetAttrOrDefault("align_corners", 0)); + // ORT_ENFORCE(mode_str == "bilinear" || mode_str == "nearest" || mode_str == "bicubic", + // "mode \"", mode_str, "\" not supported, expect bilinear, nearest or bicubic"); + // ORT_ENFORCE(padding_mode_str == "zeros" || padding_mode_str == "border" || padding_mode_str == "reflection", + // "padding_mode \"", padding_mode_str, "\" not supported, expect zeros, border or reflection"); + // if (mode_str == "bicubic") { + // mode_ = Bicubic; + // } else if (mode_str == "nearest") { + // mode_ = Nearest; + // } else { + // mode_ = Bilinear; + // } + // if (padding_mode_str == "reflection") { + // padding_mode_ = Reflection; + // } else if (padding_mode_str == "border") { + // padding_mode_ = Border; + // } else { + // padding_mode_ = Zeros; + // } + } + + Status Compute(OpKernelContext* context) const override; + + private: + // enum GridSampleInterpolationMode { + // Bilinear, + // Nearest, + // Bicubic + // }; + + // enum GridSamplePaddingMode { + // Zeros, + // Border, + // Reflection + // }; + + // T PixelAtGrid(const T* image, int64_t r, int64_t c, int64_t H, int64_t W, float border[/* 4 */]) const; + + // GridSampleInterpolationMode mode_{Bilinear}; + // GridSamplePaddingMode padding_mode_{Zeros}; + // bool align_corners_{0}; +}; + +} // namespace onnxruntime diff --git a/onnxruntime/python/tools/pytorch_export_contrib_ops.py b/onnxruntime/python/tools/pytorch_export_contrib_ops.py index aaca3806605a9..6d11f6ebeb6ae 100644 --- a/onnxruntime/python/tools/pytorch_export_contrib_ops.py +++ b/onnxruntime/python/tools/pytorch_export_contrib_ops.py @@ -91,6 +91,9 @@ def tril(g, self, diagonal): _reg(tril) + def col2im(g, self: torch._C.Value, image_shape, block_shape): + return g.op("com.microsoft::Col2Im", self, image_shape, block_shape) + def unregister(): """Unregister ONNX Runtime's built-in contrib ops.""" diff --git a/onnxruntime/test/python/contrib_ops/onnx_test_col2im.py b/onnxruntime/test/python/contrib_ops/onnx_test_col2im.py new file mode 100644 index 0000000000000..97269d895a125 --- /dev/null +++ b/onnxruntime/test/python/contrib_ops/onnx_test_col2im.py @@ -0,0 +1,55 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# +# Test reference implementation and model for ONNX Runtime conrtib op trilu + +import unittest + +import numpy as np +import onnx +from onnx_contrib_ops_helper import expect + + +class ONNXReferenceImplementationTest(unittest.TestCase): + def test_col2im(self) -> None: + input = np.array( + [ + [ + [1.0, 6.0, 11.0, 16.0, 21.0], # (1, 5, 5) + [2.0, 7.0, 12.0, 17.0, 22.0], + [3.0, 8.0, 13.0, 18.0, 23.0], + [4.0, 9.0, 14.0, 19.0, 24.0], + [5.0, 0.0, 15.0, 20.0, 25.0], + ] + ] + ).astype(np.float32) + image_shape = np.array([5, 5]).astype(np.int64) + block_shape = np.array([1, 5]).astype(np.int64) + node = onnx.helper.make_node( + "Col2Im", ["input", "image_shape", "block_shape"], ["col2im_reference_implementation"] + ) + + col2im_reference_implementation = np.array( + [ + [ + [ + [1.0, 2.0, 3.0, 4.0, 5.0], # (1, 1, 5, 5) + [6.0, 7.0, 8.0, 9.0, 0.0], + [11.0, 12.0, 13.0, 14.0, 15.0], + [16.0, 17.0, 18.0, 19.0, 20.0], + [21.0, 22.0, 23.0, 24.0, 25.0], + ] + ] + ] + ).astype(np.float32) + + expect( + node, + inputs=[input, image_shape, block_shape], + outputs=[col2im_reference_implementation], + name="test_col2im", + ) + + +if __name__ == "__main__": + unittest.main(module=__name__, buffer=True) diff --git a/onnxruntime/test/testdata/kernel_def_hashes/contrib.cpu.json b/onnxruntime/test/testdata/kernel_def_hashes/contrib.cpu.json index 5fb55faa14c5c..1babf97a23f73 100644 --- a/onnxruntime/test/testdata/kernel_def_hashes/contrib.cpu.json +++ b/onnxruntime/test/testdata/kernel_def_hashes/contrib.cpu.json @@ -147,6 +147,10 @@ "Gelu com.microsoft CPUExecutionProvider", 4658746266161736328 ], + [ + "Col2Im com.microsoft CPUExecutionProvider", + 11924582339825775592 + ], [ "GridSample com.microsoft CPUExecutionProvider", 11924582339825775592 From 630604249395fb16f843a63224674f8d442b400c Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Tue, 26 Jul 2022 12:22:56 -0400 Subject: [PATCH 02/30] Add missing op declaration --- onnxruntime/contrib_ops/cpu/col2im.cc | 22 +++++++++++++++++++ .../core/providers/cpu/tensor/col2im.cc | 14 +++++------- 2 files changed, 28 insertions(+), 8 deletions(-) create mode 100644 onnxruntime/contrib_ops/cpu/col2im.cc diff --git a/onnxruntime/contrib_ops/cpu/col2im.cc b/onnxruntime/contrib_ops/cpu/col2im.cc new file mode 100644 index 0000000000000..50689ccb6b4ab --- /dev/null +++ b/onnxruntime/contrib_ops/cpu/col2im.cc @@ -0,0 +1,22 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "core/providers/cpu/tensor/col2im.h" +#include "core/providers/common.h" + +namespace onnxruntime { +namespace contrib { + +#define REGISTER_KERNEL_TYPED(T) \ + ONNX_OPERATOR_TYPED_KERNEL_EX( \ + Col2Im, \ + kMSDomain, \ + 1, \ + T, \ + kCpuExecutionProvider, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::AllTensorTypes()), \ + Col2Im); + +REGISTER_KERNEL_TYPED(float) + +} // namespace contrib +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index 4a465d4f99826..f2893cb62c854 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -11,14 +11,12 @@ namespace onnxruntime { -#define REGISTER_KERNEL_TYPED(T) \ - ONNX_CPU_OPERATOR_TYPED_KERNEL( \ - Col2Im, \ - 1, \ - T, \ - KernelDefBuilder() \ - .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ - .TypeConstraint("T2", DataTypeImpl::GetTensorType()), \ +#define REGISTER_KERNEL_TYPED(T) \ + ONNX_CPU_OPERATOR_TYPED_KERNEL( \ + Col2Im, \ + 1, \ + T, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::AllTensorTypes()), \ Col2Im); REGISTER_KERNEL_TYPED(float) From 24f95da558a01a0b7fd793f796a6e12afa6633f0 Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Tue, 26 Jul 2022 13:34:45 -0400 Subject: [PATCH 03/30] Fix hash --- .../test_col2im/test_data_set_0/input_0.pb | Bin 0 -> 117 bytes .../test_col2im/test_data_set_0/input_1.pb | Bin 0 -> 35 bytes .../test_col2im/test_data_set_0/input_2.pb | Bin 0 -> 35 bytes .../test_col2im/test_data_set_0/output_0.pb | Bin 0 -> 145 bytes .../testdata/kernel_def_hashes/contrib.cpu.json | 2 +- 5 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_0.pb create mode 100644 onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_1.pb create mode 100644 onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_2.pb create mode 100644 onnxruntime/test/python/testdata/test_col2im/test_data_set_0/output_0.pb diff --git a/onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_0.pb b/onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..164166b2c84e8c0968a316c70ceb85e9b5fea07e GIT binary patch literal 117 zcmd;J@x2I3Qr0LGRS@&Et; literal 0 HcmV?d00001 diff --git a/onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_1.pb b/onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..e2e47c174ce48b0b6cc775ccbad84426c3925a39 GIT binary patch literal 35 fcmd;J5@2`Y&dg0rPmM3mNGwS85@2P302mDbe=`L} literal 0 HcmV?d00001 diff --git a/onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_2.pb b/onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..c0b7595628c4bb8bd1859c490f6242ca6bdbf7cc GIT binary patch literal 35 gcmd;J5@2`YPRhwo&WST6h+bR0?8#CWCg?mft0DT0YT(PvV;vn4B(QIG9@J?WlGA7!6@83-J@rX zH8!zHYHpXNYjfwTLEk!PXtZ*^Z`ErU>(O=L{t}5-5h(}}3w9ikQY6KH8DHRmCwfFo OnDN1a6&t?T@heYoXdiw6 literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/kernel_def_hashes/contrib.cpu.json b/onnxruntime/test/testdata/kernel_def_hashes/contrib.cpu.json index 1babf97a23f73..181e69b61090f 100644 --- a/onnxruntime/test/testdata/kernel_def_hashes/contrib.cpu.json +++ b/onnxruntime/test/testdata/kernel_def_hashes/contrib.cpu.json @@ -149,7 +149,7 @@ ], [ "Col2Im com.microsoft CPUExecutionProvider", - 11924582339825775592 + 16946735406825550320 ], [ "GridSample com.microsoft CPUExecutionProvider", From 98176b94aaf08b4d831670f2c36ffc6cde1a8111 Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Tue, 26 Jul 2022 14:39:55 -0400 Subject: [PATCH 04/30] Fix ci --- .../core/graph/contrib_ops/contrib_defs.cc | 17 ++++++++--------- onnxruntime/core/providers/cpu/tensor/col2im.cc | 1 + onnxruntime/core/providers/cpu/tensor/col2im.h | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index 6c0a2389de1f4..8fae65f390e4b 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -912,7 +912,7 @@ void col2imShapeInference(InferenceContext& ctx) { return; } - // TODO: Assume image_shape has correct spatial dimensions for next validations + // Assuming image_shape has correct spatial dimensions and reused for next validation steps // An alternative is get the the number of spatial dimensions as an input if (ctx.getInputType(1)->tensor_type().shape().dim_size() != 1) { fail_shape_inference("image_shape tensor must have rank 1."); @@ -969,7 +969,7 @@ void col2imShapeInference(InferenceContext& ctx) { fail_shape_inference("block_shape tensor must have ", n_input_dims, " spatial dimensions."); } - int block_shape_size = 0; + int64_t block_shape_size = 0; if (static_cast(block_shape.size()) > 0) { block_shape_size = 1; for (const auto& dim : block_shape) { @@ -983,12 +983,12 @@ void col2imShapeInference(InferenceContext& ctx) { // Dimensions N and C are always present Dim N, C; if (ctx.getInputType(0)->tensor_type().shape().dim(0).has_dim_value()) { - N = input_shape.dim(0); // Otherwise, N is unknown. + N = input_shape.dim(0); // Otherwise, N is unknown. } *final_image_shape->add_dim() = N; if (block_shape_size > 0) { - C = input_shape.dim(1) / block_shape_size; // Otherwise, C is unknown. + C = input_shape.dim(1) / block_shape_size; // Otherwise, C is unknown. } *final_image_shape->add_dim() = C; @@ -996,7 +996,7 @@ void col2imShapeInference(InferenceContext& ctx) { for (size_t i = 0; i < n_input_dims; ++i) { Dim image_dim_i; if (image_shape.size() > 0) { - image_dim_i.set_dim_value(image_shape[i]); // Otherwise, spatial dimensions are unknown + image_dim_i.set_dim_value(image_shape[i]); // Otherwise, spatial dimensions are unknown } *final_image_shape->add_dim() = image_dim_i; } @@ -1026,8 +1026,8 @@ ONNX_MS_OPERATOR_SET_SCHEMA(Col2Im, 1, OPTIONAL_VALUE) .Attr( "pads", - "1-dimensional tensor with padding value for the beginning and ending along each spatial axis, " - "it can take any value greater than or equal to 0. " + "1-dimensional tensor with padding value for the beginning and ending along each" + " spatial axis, it can take any value greater than or equal to 0. " "The value represent the number of pixels added to the beginning " "and end part of the corresponding axis. `pads` format should be as follow " "[x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin is the number of pixels " @@ -1087,8 +1087,7 @@ ONNX_MS_OPERATOR_SET_SCHEMA(Col2Im, 1, "T", OpSchema::all_tensor_types_with_bfloat(), "Constrain input and output types to all numeric tensor types.") - .TypeAndShapeInferenceFunction([](InferenceContext& ctx) { col2imShapeInference(ctx); }) - ); + .TypeAndShapeInferenceFunction([](InferenceContext& ctx) { col2imShapeInference(ctx); })); constexpr const char* GridSample_ver1_doc = R"DOC( Given an `input` and a flow-field `grid`, computes the `output` using `input` values and pixel locations from `grid`. diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index f2893cb62c854..fa95dd23560a9 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -23,6 +23,7 @@ REGISTER_KERNEL_TYPED(float) template Status Col2Im::Compute(OpKernelContext* context) const { + (void) context; return Status::OK(); } diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.h b/onnxruntime/core/providers/cpu/tensor/col2im.h index 35afed4c5ed05..03cfc3630877c 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.h +++ b/onnxruntime/core/providers/cpu/tensor/col2im.h @@ -7,7 +7,7 @@ #include "core/framework/op_kernel.h" #include "core/util/math_cpuonly.h" #include "core/framework/tensor.h" -#include "concatbase.h" +#include "core/providers/cpu/tensor/concatbase.h" namespace onnxruntime { From 5b70c4a9379c6b79622cbe97678305c10dcc50f5 Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Wed, 27 Jul 2022 17:48:49 -0400 Subject: [PATCH 05/30] Kernel impl --- .../core/providers/cpu/tensor/col2im.cc | 55 ++++++++++++++++++- .../core/providers/cpu/tensor/col2im.h | 43 ++------------- .../providers/cpu/tensor/col2im_attributes.h | 55 +++++++++++++++++++ 3 files changed, 113 insertions(+), 40 deletions(-) create mode 100644 onnxruntime/core/providers/cpu/tensor/col2im_attributes.h diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index fa95dd23560a9..dd95327e62935 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -7,6 +7,7 @@ #include "core/framework/TensorSeq.h" #include "core/providers/common.h" #include "core/framework/copy.h" +#include "core/common/safeint.h" #include "core/providers/op_kernel_type_control.h" namespace onnxruntime { @@ -23,7 +24,59 @@ REGISTER_KERNEL_TYPED(float) template Status Col2Im::Compute(OpKernelContext* context) const { - (void) context; + const auto* col_input = context->Input(0); + const auto* image_shape = context->Input(1); + const auto* kernel_shape = context->Input(2); + + TensorShape col_shape = col_input->Shape(); + const auto num_image_channels = image_shape->Shape()[1]; + const auto batch_size = col_shape[0]; + + const int64_t image_size = image_shape->Shape().Size(); + + AllocatorPtr alloc; + ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&alloc)); + const int64_t col_buffer_size = col_input->Shape().Size(); + auto col_data = alloc->Alloc(SafeInt(sizeof(T)) * col_buffer_size); + + BufferUniquePtr col_buffer(col_data, BufferDeleter(std::move(alloc))); + T* col_buffer_data = static_cast(col_buffer.get()); + + TensorShapeVector Y_dims; + Y_dims.insert(Y_dims.begin(), {batch_size, num_image_channels}); + TensorShape Yshape(Y_dims); + Tensor* Y = context->Output(0, Yshape); + T* Ydata = Y->template MutableData(); + + // template + // void Col2imNd( + // const T* data_col, + // const int64_t* img_shape, + // const int64_t* output_shape, + // int64_t channels_col, + // int64_t img_size, + // const int64_t* kernel_shape, + // const int64_t* stride, + // const int64_t* dilation, + // const int64_t* pad, + // ptrdiff_t N, + // T* data_img, + // Provider* provider); + + math::Col2imNd( + col_buffer_data, + image_shape->Shape().GetDims().data(), + col_shape.GetDims().data(), + num_image_channels, + image_size, + kernel_shape->Shape().GetDims().data(), + col2im_attrs_.strides.data(), + col2im_attrs_.dilations.data(), + col2im_attrs_.pads.data(), + static_cast(kernel_shape->Shape().Size()), + Ydata, + &CPUMathUtil::Instance()); + return Status::OK(); } diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.h b/onnxruntime/core/providers/cpu/tensor/col2im.h index 03cfc3630877c..8cbefd2ec668b 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.h +++ b/onnxruntime/core/providers/cpu/tensor/col2im.h @@ -3,6 +3,8 @@ #pragma once +#include "core/providers/cpu/tensor/col2im_attributes.h" + #include "core/common/common.h" #include "core/framework/op_kernel.h" #include "core/util/math_cpuonly.h" @@ -14,50 +16,13 @@ namespace onnxruntime { template class Col2Im final : public OpKernel { public: - explicit Col2Im(const OpKernelInfo& info) : OpKernel(info) { - // std::string mode_str = info.GetAttrOrDefault("mode", "bilinear"); - // std::string padding_mode_str = info.GetAttrOrDefault("padding_mode", "zeros"); - // align_corners_ = static_cast(info.GetAttrOrDefault("align_corners", 0)); - // ORT_ENFORCE(mode_str == "bilinear" || mode_str == "nearest" || mode_str == "bicubic", - // "mode \"", mode_str, "\" not supported, expect bilinear, nearest or bicubic"); - // ORT_ENFORCE(padding_mode_str == "zeros" || padding_mode_str == "border" || padding_mode_str == "reflection", - // "padding_mode \"", padding_mode_str, "\" not supported, expect zeros, border or reflection"); - // if (mode_str == "bicubic") { - // mode_ = Bicubic; - // } else if (mode_str == "nearest") { - // mode_ = Nearest; - // } else { - // mode_ = Bilinear; - // } - // if (padding_mode_str == "reflection") { - // padding_mode_ = Reflection; - // } else if (padding_mode_str == "border") { - // padding_mode_ = Border; - // } else { - // padding_mode_ = Zeros; - // } + explicit Col2Im(const OpKernelInfo& info) : OpKernel(info), col2im_attrs_(info) { } Status Compute(OpKernelContext* context) const override; private: - // enum GridSampleInterpolationMode { - // Bilinear, - // Nearest, - // Bicubic - // }; - - // enum GridSamplePaddingMode { - // Zeros, - // Border, - // Reflection - // }; - - // T PixelAtGrid(const T* image, int64_t r, int64_t c, int64_t H, int64_t W, float border[/* 4 */]) const; - - // GridSampleInterpolationMode mode_{Bilinear}; - // GridSamplePaddingMode padding_mode_{Zeros}; - // bool align_corners_{0}; + Col2ImAttributes col2im_attrs_; }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h new file mode 100644 index 0000000000000..299bd533296f5 --- /dev/null +++ b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h @@ -0,0 +1,55 @@ +/** +* Copyright (c) 2016-present, Facebook, Inc. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +/* Modifications Copyright (c) Microsoft. */ + +#pragma once + +#ifndef SHARED_PROVIDER +#include "core/common/common.h" +#include "core/providers/common.h" +#include "core/util/math.h" +#endif + +#include "core/common/inlined_containers.h" +#include "core/framework/op_kernel.h" +#include "core/framework/op_node_proto_helper.h" + +namespace onnxruntime { + +struct Col2ImAttributes { + using Col2ImPadVector = InlinedVector; + + explicit Col2ImAttributes(const OpKernelInfo& info) { + auto status = info.GetAttrs("strides", strides); + ORT_ENFORCE(status.IsOK()); + + gsl::span pads_span; + status = info.GetAttrsAsSpan("pads", pads_span); + ORT_ENFORCE(status.IsOK()); + pads.assign(pads_span.cbegin(), pads_span.cend()); + + status = info.GetAttrs("dilations", dilations); + ORT_ENFORCE(status.IsOK()); + } + + ~Col2ImAttributes() = default; + + Col2ImPadVector pads; + TensorShapeVector dilations; + TensorShapeVector strides; +}; + +} // namespace onnxruntime From 47c085a13760ab980a0505699bf21970b48cdd43 Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Mon, 1 Aug 2022 20:36:38 -0400 Subject: [PATCH 06/30] Add debug info --- .../core/providers/cpu/tensor/col2im.cc | 83 ++++++++++-------- .../providers/cpu/tensor/col2im_attributes.h | 6 +- onnxruntime/core/util/math_cpu.cc | 83 ++++++++++++++---- onnxruntime/test/contrib_ops/col2im_test.cc | 28 ++++++ .../test_col2im/test_data_set_0/input_0.pb | Bin 0 -> 117 bytes .../test_col2im/test_data_set_0/input_1.pb | Bin 0 -> 35 bytes .../test_col2im/test_data_set_0/input_2.pb | Bin 0 -> 35 bytes .../test_col2im/test_data_set_0/output_0.pb | Bin 0 -> 120 bytes .../test_col2im_5d/test_data_set_0/input_0.pb | Bin 0 -> 498 bytes .../test_col2im_5d/test_data_set_0/input_1.pb | Bin 0 -> 43 bytes .../test_col2im_5d/test_data_set_0/input_2.pb | Bin 0 -> 43 bytes .../test_data_set_0/output_0.pb | Bin 0 -> 503 bytes .../test_data_set_0/input_0.pb | Bin 0 -> 97 bytes .../test_data_set_0/input_1.pb | Bin 0 -> 35 bytes .../test_data_set_0/input_2.pb | Bin 0 -> 35 bytes .../test_data_set_0/output_0.pb | Bin 0 -> 165 bytes .../test_data_set_0/input_0.pb | Bin 0 -> 318 bytes .../test_data_set_0/input_1.pb | Bin 0 -> 35 bytes .../test_data_set_0/input_2.pb | Bin 0 -> 35 bytes .../test_data_set_0/output_0.pb | Bin 0 -> 120 bytes .../test_data_set_0/input_0.pb | Bin 0 -> 162 bytes .../test_data_set_0/input_1.pb | Bin 0 -> 35 bytes .../test_data_set_0/input_2.pb | Bin 0 -> 35 bytes .../test_data_set_0/output_0.pb | Bin 0 -> 120 bytes 24 files changed, 140 insertions(+), 60 deletions(-) create mode 100644 onnxruntime/test/contrib_ops/col2im_test.cc create mode 100644 onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_0.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_1.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_2.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/output_0.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_5d/test_data_set_0/input_0.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_5d/test_data_set_0/input_1.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_5d/test_data_set_0/input_2.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_5d/test_data_set_0/output_0.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_0.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_1.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_2.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/output_0.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_0.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_1.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_2.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/output_0.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_0.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_1.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_2.pb create mode 100644 onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/output_0.pb diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index dd95327e62935..4c3f999766f5e 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -27,55 +27,60 @@ Status Col2Im::Compute(OpKernelContext* context) const { const auto* col_input = context->Input(0); const auto* image_shape = context->Input(1); const auto* kernel_shape = context->Input(2); + std::cout << "Status Col2Im::Compute(OpKernelContext* context)" << std::endl; - TensorShape col_shape = col_input->Shape(); - const auto num_image_channels = image_shape->Shape()[1]; - const auto batch_size = col_shape[0]; + const T* col_input_data = col_input->template Data(); + TensorShape col_input_shape = col_input->Shape(); + int64_t col_input_C = col_input_shape[1]; + const auto col_input_N = col_input_shape[0]; - const int64_t image_size = image_shape->Shape().Size(); - - AllocatorPtr alloc; - ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&alloc)); - const int64_t col_buffer_size = col_input->Shape().Size(); - auto col_data = alloc->Alloc(SafeInt(sizeof(T)) * col_buffer_size); - - BufferUniquePtr col_buffer(col_data, BufferDeleter(std::move(alloc))); - T* col_buffer_data = static_cast(col_buffer.get()); + int64_t image_shape_size = 1; + int64_t kernel_shape_size = 1; + for (auto i=0; i < image_shape->Shape().Size(); ++i) { + image_shape_size *= image_shape->Data()[i]; + kernel_shape_size *= kernel_shape->Data()[i]; + // col_input_C computed as => (C*n-ary-prod{kernel_shape}) / n-ary-prod{kernel_shape} + col_input_C /= kernel_shape->Data()[i]; + } TensorShapeVector Y_dims; - Y_dims.insert(Y_dims.begin(), {batch_size, num_image_channels}); + Y_dims.insert(Y_dims.begin(), {col_input_N, col_input_C}); + for (auto i=0; i < image_shape->Shape()[0]; ++i) { + Y_dims.push_back(image_shape->Data()[i]); + } TensorShape Yshape(Y_dims); Tensor* Y = context->Output(0, Yshape); T* Ydata = Y->template MutableData(); - // template - // void Col2imNd( - // const T* data_col, - // const int64_t* img_shape, - // const int64_t* output_shape, - // int64_t channels_col, - // int64_t img_size, - // const int64_t* kernel_shape, - // const int64_t* stride, - // const int64_t* dilation, - // const int64_t* pad, - // ptrdiff_t N, - // T* data_img, - // Provider* provider); + std::cout << "\n\tInput 0: col_input = ("; for (auto i=0; i < Yshape.Size(); ++i) std::cout << col_input_data[i] << ", "; std::cout << ") with shape "<< Yshape << std::endl; + std::cout << "\tInput 1: image_shape = ("; for (auto i=0; i < image_shape->Shape().Size(); ++i) std::cout << image_shape->Data()[i] << ", "; std::cout << ")" << std::endl; + std::cout << "\tInput 2: kernel_shape = ("; for (auto i=0; i < kernel_shape->Shape().Size(); ++i) std::cout << kernel_shape->Data()[i] << ", "; std::cout << ")" << std::endl; + std::cout << "\tAttribute strides = ("; for (size_t i=0; i < col2im_attrs_.strides.size(); ++i) std::cout << col2im_attrs_.strides[i] << ", "; std::cout << ")"<< std::endl; + std::cout << "\tAttribute dilations = ("; for (size_t i=0; i < col2im_attrs_.dilations.size(); ++i) std::cout << col2im_attrs_.dilations[i] << ", "; std::cout << ")"<< std::endl; + std::cout << "\tAttribute pads = ("; for (size_t i=0; i < col2im_attrs_.pads.size(); ++i) std::cout << col2im_attrs_.pads[i] << ", "; std::cout << ")"<< std::endl; + + std::cout << "\tVariable col_input_C: " << col_input_C << std::endl; + std::cout << "\tVariable col_input_N = " << col_input_N << std::endl; + std::cout << "\tVariable image_shape_size: " << image_shape_size << std::endl; + std::cout << "\tVariable kernel_shape_size: " << kernel_shape_size << std::endl; + + std::cout << "\n\tStatus Col2Im::Compute() --> math::Col2imNd<>()" << std::endl; math::Col2imNd( - col_buffer_data, - image_shape->Shape().GetDims().data(), - col_shape.GetDims().data(), - num_image_channels, - image_size, - kernel_shape->Shape().GetDims().data(), - col2im_attrs_.strides.data(), - col2im_attrs_.dilations.data(), - col2im_attrs_.pads.data(), - static_cast(kernel_shape->Shape().Size()), - Ydata, - &CPUMathUtil::Instance()); + col_input_data, // const T* data_col, + image_shape->Data(), // const int64_t* img_shape, + Yshape.Slice(2).GetDims().data(), // const int64_t* output_shape, + col_input_C, // int64_t channels_col, --> output_num_channels * kernel_shape_size + image_shape_size, // int64_t img_size, + kernel_shape->Data(), // const int64_t* kernel_shape, + col2im_attrs_.strides.data(), // const int64_t* stride, + col2im_attrs_.dilations.data(), // const int64_t* dilation, + col2im_attrs_.pads.data(), // const int64_t* pad, + kernel_shape->Shape().Size(), // ptrdiff_t N, --> number of spatial dims for image + Ydata, // T* data_img, + &CPUMathUtil::Instance() // Provider* provider + ); + std::cout << "\n\n Return Col2Im::Compute() --> "; for (auto i=0; i < Yshape.Size(); ++i) std::cout << Ydata[i] << ", "; std::cout << ") with shape " << Yshape << std::endl << std::endl; return Status::OK(); } diff --git a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h index 299bd533296f5..9639718db5ecf 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h +++ b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h @@ -34,15 +34,15 @@ struct Col2ImAttributes { explicit Col2ImAttributes(const OpKernelInfo& info) { auto status = info.GetAttrs("strides", strides); - ORT_ENFORCE(status.IsOK()); + // ORT_ENFORCE(status.IsOK()); gsl::span pads_span; status = info.GetAttrsAsSpan("pads", pads_span); - ORT_ENFORCE(status.IsOK()); + // ORT_ENFORCE(status.IsOK()); pads.assign(pads_span.cbegin(), pads_span.cend()); status = info.GetAttrs("dilations", dilations); - ORT_ENFORCE(status.IsOK()); + // ORT_ENFORCE(status.IsOK()); } ~Col2ImAttributes() = default; diff --git a/onnxruntime/core/util/math_cpu.cc b/onnxruntime/core/util/math_cpu.cc index 164e88573c4cb..05b265715e407 100644 --- a/onnxruntime/core/util/math_cpu.cc +++ b/onnxruntime/core/util/math_cpu.cc @@ -31,6 +31,7 @@ #pragma GCC diagnostic pop #endif using onnxruntime::concurrency::ThreadPool; +#include namespace onnxruntime { namespace math { @@ -370,7 +371,27 @@ void Im2col::operator()( T* data_col, bool accumulate_output, T padding_value) { - int64_t kernel_size = std::accumulate(kernel_shape, kernel_shape + rank, 1LL, std::multiplies()); + + int64_t im_shape_size = std::accumulate(im_shape, im_shape + rank, 1LL, std::multiplies()); + int64_t output_shape_size = std::accumulate(output_shape, output_shape + rank, 1LL, std::multiplies()); + int64_t kernel_shape_size = std::accumulate(kernel_shape, kernel_shape + rank, 1LL, std::multiplies()); + + std::cout << "\n\nCalled void Im2col::operator()("; + std::cout << ",\n\tconst T* data_im={"; for (auto i=0; i < im_shape_size; ++i) std::cout << data_im[i] << ", "; std::cout << "}"; + std::cout << ",\n\tconst int64_t* im_shape={"; for (auto i=0; i < rank; ++i) std::cout << im_shape[i] << ", "; std::cout << "}"; + std::cout << ",\n\tconst int64_t* output_shape={"; for (auto i=0; i < rank; ++i) std::cout << output_shape[i] << ", "; std::cout << "}"; + std::cout << ",\n\tint64_t channels_col=" << channels_col; + std::cout << ",\n\tconst int64_t* kernel_shape={"; for (auto i=0; i < rank; ++i) std::cout << kernel_shape[i] << ", "; std::cout << "}"; + std::cout << ",\n\tconst int64_t* stride={"; for (auto i=0; i < rank; ++i) std::cout << stride[i] << ", "; std::cout << "}"; + std::cout << ",\n\tconst int64_t* dilation={"; for (auto i=0; i < rank; ++i) std::cout << dilation[i] << ", "; std::cout << "}"; + std::cout << ",\n\tconst int64_t* pad={"; for (auto i=0; i < rank; ++i) std::cout << pad[i] << ", "; std::cout << "}"; + std::cout << ",\n\tptrdiff_t rank=" << rank; + std::cout << ",\n\tT* data_col= preallocated pointer to write at {"; for (auto i=0; i < output_shape_size; ++i) std::cout << data_col[i] << ", "; std::cout << "}"; + std::cout << ",\n\tbool accumulate_output=" << accumulate_output; + std::cout << ",\n\tT padding_value=" << padding_value << ")"; + + std::cout << "\n\n\tVariable im_shape_size: " << im_shape_size << "\n\tVariable output_shape_size: "< d_offset(rank, 0); std::vector d_iter(rank, 0); for (int64_t c_col = 0; c_col < channels_col; ++c_col) { @@ -386,7 +407,7 @@ void Im2col::operator()( // Loop over spatial axes in forward order to compute the indices in the // image and column, and whether the index lies in the padding. int64_t index_col = c_col; - int64_t index_im = c_col / kernel_size; + int64_t index_im = c_col / kernel_shape_size; bool is_padding = false; for (ptrdiff_t d_i = 0; d_i < rank; ++d_i) { int64_t d = d_iter[d_i]; @@ -408,6 +429,8 @@ void Im2col::operator()( } } while (NextPosition(rank, output_shape, d_iter.data())); } // for (int c = 0; c < channels_col; ++c) { + + std::cout << "Return void Im2col -> T* data_col={"; for (auto i=0; i < output_shape_size; ++i) std::cout << data_col[i] << ", "; std::cout << "}\n"; } template struct Im2col; @@ -780,24 +803,48 @@ void Col2im(const float* data_col, int64 } template <> -void Col2imNd(const float* data_col, const int64_t* img_shape, - const int64_t* output_shape, int64_t channels_col, int64_t img_size, - const int64_t* kernel_shape, const int64_t* stride, - const int64_t* dilation, const int64_t* pad, ptrdiff_t N, - float* data_img, CPUMathUtil* context) { +void Col2imNd(const float* data_col, + const int64_t* img_shape, + const int64_t* output_shape, + int64_t channels_col, + int64_t img_size, + const int64_t* kernel_shape, + const int64_t* stride, + const int64_t* dilation, + const int64_t* pad, + ptrdiff_t N, + float* data_img, + CPUMathUtil* context) { + std::cout << "\n\nCalled void Col2imNd("; + std::cout << ",\n\tconst float* data_col={"; for (auto i=0; i < img_size; ++i) std::cout << data_col[i] << ", "; std::cout << "}"; + std::cout << ",\n\tconst int64_t* img_shape={"; for (auto i=0; i < N; ++i) std::cout << img_shape[i] << ", "; std::cout << "}"; + std::cout << ",\n\tconst int64_t* output_shape={"; for (auto i=0; i < N; ++i) std::cout << output_shape[i] << ", "; std::cout << "}"; + std::cout << ",\n\tint64_t channels_col=" << channels_col; + std::cout << ",\n\tint64_t img_size=" << img_size; + std::cout << ",\n\tconst int64_t* kernel_shape={"; for (auto i=0; i < N; ++i) std::cout << kernel_shape[i] << ", "; std::cout << "}"; + std::cout << ",\n\tconst int64_t* stride={"; for (auto i=0; i < N; ++i) std::cout << stride[i] << ", "; std::cout << "}"; + std::cout << ",\n\tconst int64_t* dilation={"; for (auto i=0; i < N; ++i) std::cout << dilation[i] << ", "; std::cout << "}"; + std::cout << ",\n\tconst int64_t* pad={"; for (auto i=0; i < 2*N; ++i) std::cout << pad[i] << ", "; std::cout << "}"; + std::cout << ",\n\tptrdiff_t N=" << N; + std::cout << ",\n\tfloat* data_img= preallocated pointer to save at {"; for (auto i=0; i < img_size; ++i) std::cout << data_img[i] << ", "; std::cout << "}"; + std::cout << ",\n\tCPUMathUtil* context=...)" << std::endl; + Set(gsl::narrow(img_size), 0, data_img, context); Im2col()( - data_col, - img_shape, - output_shape, - channels_col, - kernel_shape, - stride, - dilation, - pad, - N, - data_img, - true); + data_col, // const T* data_im, + img_shape, // const int64_t* im_shape, + output_shape, // const int64_t* output_shape, + channels_col, // int64_t channels_col, + kernel_shape, // const int64_t* kernel_shape, + stride, // const int64_t* stride, + dilation, // const int64_t* dilation, + pad, // const int64_t* pad, + N, // ptrdiff_t rank, + data_img, // T* data_col, + true // bool accumulate_output, + ); + + std::cout << "Return void Col2imNd --> float* data_img= {"; for (auto i=0; i < img_size; ++i) std::cout << data_img[i] << ", "; std::cout << "}"; } #define SPECIALIZED_COPYVECTOR(T) \ diff --git a/onnxruntime/test/contrib_ops/col2im_test.cc b/onnxruntime/test/contrib_ops/col2im_test.cc new file mode 100644 index 0000000000000..2a1b692673fc5 --- /dev/null +++ b/onnxruntime/test/contrib_ops/col2im_test.cc @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" +#include "core/util/math.h" + +namespace onnxruntime { +namespace test { + +TEST(Col2ImContribOpTest, simple) { + OpTester test("Col2Im", 1, kMSDomain); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + test.AddInput("input", {1, 5, 5}, std::vector{1.f, 6.f, 11.f, 16.f, 21.f, 2.f, 7.f, 12.f, 17.f, 22.f, 3.f, 8.f, 13.f, 18.f, 23.f, 4.f, 9.f, 14.f, 19.f, 24.f, 5.f, 0.f, 15.f, 20.f, 25.f}); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {1, 1, 5, 5}, std::vector{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 25.f}); + test.Run(); +} + + +} // namespace test +} // namespace onnxruntime diff --git a/onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_0.pb b/onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..164166b2c84e8c0968a316c70ceb85e9b5fea07e GIT binary patch literal 117 zcmd;J@x2I3Qr0LGRS@&Et; literal 0 HcmV?d00001 diff --git a/onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_1.pb b/onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..e2e47c174ce48b0b6cc775ccbad84426c3925a39 GIT binary patch literal 35 fcmd;J5@2`Y&dg0rPmM3mNGwS85@2P302mDbe=`L} literal 0 HcmV?d00001 diff --git a/onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_2.pb b/onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..c0b7595628c4bb8bd1859c490f6242ca6bdbf7cc GIT binary patch literal 35 gcmd;J5@2`YPRhwo&WQoO#oXPz20+Gu0V(5@+l l6vss*_afp583h$D#EV$`>v)F=GXgR;>^M+x;=+xJ$A70x6@35z literal 0 HcmV?d00001 diff --git a/onnxruntime/test/python/testdata/node/test_col2im_5d/test_data_set_0/input_0.pb b/onnxruntime/test/python/testdata/node/test_col2im_5d/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..0b66e3fbccc21c2a88060142326527a6fd6ca537 GIT binary patch literal 498 zcmWm4F(?FJ0EXfJLR~W4U@#aA27|$1Fev9TNQN5>27|#sNjID_7%oYYBuST~OVTAt zk}gS?q)U<{U6L+Il60fdV|ePT_lriN_0fh{BvCgpm6}cuRBA%V^n_51htSb$ljoja zVu=u1>0*#kf^UVmzQ_?}g4ReI-6R~B!C){L32vFx)ItN!|DNQDqHveCFrHrryWZMNHC!cM#FHff*z z4m#wpBaS-exD!sAHsg%5W}S211s7d%*%foHy5_nYZo1{Rd3W4(&w~3Nc<7PGo_K1} nGta%SCh$jHC c1Caj!hz)@30wA6N#0@|`14s_U2I2*d02b*GVE_OC literal 0 HcmV?d00001 diff --git a/onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_1.pb b/onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..ed056b38ede071201a58c4d489ee72565a9de9e6 GIT binary patch literal 35 fcmd;J5@2`Y&dg0rPmM3mNGwS85@2J102mDbe@g{O literal 0 HcmV?d00001 diff --git a/onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_2.pb b/onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..ea04f67ddf5b80dd13a9f42589cd7104b5e46f7a GIT binary patch literal 35 fcmd;J5@2`YPRhwo&Wm@T6DQ&$b=c0U)Xk;@xhu+-7W|J8_u|5#FQsW-_*9pD@(pf jbbB;7=A0W!?s;bJTeB^g^T`iex*CU^a6!Qx54_}m`4%=I literal 0 HcmV?d00001 diff --git a/onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_1.pb b/onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..e2e47c174ce48b0b6cc775ccbad84426c3925a39 GIT binary patch literal 35 fcmd;J5@2`Y&dg0rPmM3mNGwS85@2P302mDbe=`L} literal 0 HcmV?d00001 diff --git a/onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_2.pb b/onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..c0b7595628c4bb8bd1859c490f6242ca6bdbf7cc GIT binary patch literal 35 gcmd;J5@2`YPRhwo&W&l+oNV1vjM r;ppbc&GjHDmpl$!=n)svf(<)5thn*ug~y4clu~5)pJ;^!Eu!)RQ05ig literal 0 HcmV?d00001 diff --git a/onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_0.pb b/onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_0.pb new file mode 100644 index 0000000000000000000000000000000000000000..f33a7620e97e8b2934587759212fbf1350d5effd GIT binary patch literal 162 qcmd;J=_tH!59V*5I{E@RXG6prw@7n literal 0 HcmV?d00001 diff --git a/onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_1.pb b/onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_1.pb new file mode 100644 index 0000000000000000000000000000000000000000..e2e47c174ce48b0b6cc775ccbad84426c3925a39 GIT binary patch literal 35 fcmd;J5@2`Y&dg0rPmM3mNGwS85@2P302mDbe=`L} literal 0 HcmV?d00001 diff --git a/onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_2.pb b/onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_2.pb new file mode 100644 index 0000000000000000000000000000000000000000..19b497c93ccceed2813a63a90e568d62835d8ed1 GIT binary patch literal 35 fcmd;J5@2`YPRhwo&W Date: Wed, 3 Aug 2022 16:46:35 -0400 Subject: [PATCH 07/30] Added Tests for 4D and 5D images 4d col2im works, 5d and higher doesn't --- .../core/providers/cpu/tensor/col2im.cc | 50 ++++++++---- onnxruntime/core/util/math_cpu.cc | 2 +- onnxruntime/test/contrib_ops/col2im_test.cc | 76 ++++++++++++++++++- 3 files changed, 110 insertions(+), 18 deletions(-) diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index 4c3f999766f5e..f1daf277c2d0e 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -66,20 +66,42 @@ Status Col2Im::Compute(OpKernelContext* context) const { std::cout << "\n\tStatus Col2Im::Compute() --> math::Col2imNd<>()" << std::endl; - math::Col2imNd( - col_input_data, // const T* data_col, - image_shape->Data(), // const int64_t* img_shape, - Yshape.Slice(2).GetDims().data(), // const int64_t* output_shape, - col_input_C, // int64_t channels_col, --> output_num_channels * kernel_shape_size - image_shape_size, // int64_t img_size, - kernel_shape->Data(), // const int64_t* kernel_shape, - col2im_attrs_.strides.data(), // const int64_t* stride, - col2im_attrs_.dilations.data(), // const int64_t* dilation, - col2im_attrs_.pads.data(), // const int64_t* pad, - kernel_shape->Shape().Size(), // ptrdiff_t N, --> number of spatial dims for image - Ydata, // T* data_img, - &CPUMathUtil::Instance() // Provider* provider - ); + if (image_shape->Shape()[0] == 2) { + std::cout << "image_shape->Shape()[0] == 2 --> Col2Im" << std::endl; + math::Col2im( + col_input_data, + col_input_C, + image_shape->Data()[0], + image_shape->Data()[1], + kernel_shape->Data()[0], + kernel_shape->Data()[1], + col2im_attrs_.dilations[0], + col2im_attrs_.dilations[1], + col2im_attrs_.pads[0], + col2im_attrs_.pads[1], + col2im_attrs_.pads[2], + col2im_attrs_.pads[3], + col2im_attrs_.strides[0], + col2im_attrs_.strides[1], + Ydata, + &CPUMathUtil::Instance()); + } else { + std::cout << "image_shape->Shape()[0] != 2 --> Col2ImNd (nd=" << image_shape->Shape()[0] << ") " << std::endl; + math::Col2imNd( + col_input_data, // const T* data_col, + image_shape->Data(), // const int64_t* img_shape, + Yshape.Slice(2).GetDims().data(), // const int64_t* output_shape, + col_input_C, // int64_t channels_col, --> output_num_channels * kernel_shape_size + image_shape_size, // int64_t img_size, + kernel_shape->Data(), // const int64_t* kernel_shape, + col2im_attrs_.strides.data(), // const int64_t* stride, + col2im_attrs_.dilations.data(), // const int64_t* dilation, + col2im_attrs_.pads.data(), // const int64_t* pad, + kernel_shape->Shape().Size(), // ptrdiff_t N, --> number of spatial dims for image + Ydata, // T* data_img, + &CPUMathUtil::Instance() // Provider* provider + ); + } std::cout << "\n\n Return Col2Im::Compute() --> "; for (auto i=0; i < Yshape.Size(); ++i) std::cout << Ydata[i] << ", "; std::cout << ") with shape " << Yshape << std::endl << std::endl; return Status::OK(); diff --git a/onnxruntime/core/util/math_cpu.cc b/onnxruntime/core/util/math_cpu.cc index 05b265715e407..709941a819f79 100644 --- a/onnxruntime/core/util/math_cpu.cc +++ b/onnxruntime/core/util/math_cpu.cc @@ -384,7 +384,7 @@ void Im2col::operator()( std::cout << ",\n\tconst int64_t* kernel_shape={"; for (auto i=0; i < rank; ++i) std::cout << kernel_shape[i] << ", "; std::cout << "}"; std::cout << ",\n\tconst int64_t* stride={"; for (auto i=0; i < rank; ++i) std::cout << stride[i] << ", "; std::cout << "}"; std::cout << ",\n\tconst int64_t* dilation={"; for (auto i=0; i < rank; ++i) std::cout << dilation[i] << ", "; std::cout << "}"; - std::cout << ",\n\tconst int64_t* pad={"; for (auto i=0; i < rank; ++i) std::cout << pad[i] << ", "; std::cout << "}"; + std::cout << ",\n\tconst int64_t* pad={"; for (auto i=0; i < 2*rank; ++i) std::cout << pad[i] << ", "; std::cout << "}"; std::cout << ",\n\tptrdiff_t rank=" << rank; std::cout << ",\n\tT* data_col= preallocated pointer to write at {"; for (auto i=0; i < output_shape_size; ++i) std::cout << data_col[i] << ", "; std::cout << "}"; std::cout << ",\n\tbool accumulate_output=" << accumulate_output; diff --git a/onnxruntime/test/contrib_ops/col2im_test.cc b/onnxruntime/test/contrib_ops/col2im_test.cc index 2a1b692673fc5..f98a2d754d049 100644 --- a/onnxruntime/test/contrib_ops/col2im_test.cc +++ b/onnxruntime/test/contrib_ops/col2im_test.cc @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" #include "core/util/math.h" @@ -8,18 +9,87 @@ namespace onnxruntime { namespace test { -TEST(Col2ImContribOpTest, simple) { +template +std::vector _transpose_1dvector(std::vector &input, size_t C, size_t H, size_t W) +{ + size_t n = input.size(); + if (n == 0){ + throw std::runtime_error("Invalid input"); + } + std::vector trans_vec(input); + + std::cout << "input: ("; + for(size_t i = 0; i < n; ++i) + std::cout << trans_vec[i] << ", "; + std::cout << ")" << std::endl; + + for(size_t c = 0; c < C; ++c) + for(size_t i = 0; i < H; ++i) + for(size_t j = i+1; j < W; ++j) + std::swap(trans_vec[c*(H*W) + (H*i + j)], trans_vec[c*(H*W) + (W*j + i)]); + + std::cout << "trans_vec: ("; + for(size_t i = 0; i < n; ++i) + std::cout << trans_vec[i] << ", "; + std::cout << ")" << std::endl; + + return trans_vec; +} + +TEST(Col2ImContribOpTest, simple4dNCHW) { OpTester test("Col2Im", 1, kMSDomain); test.AddAttribute("strides", std::vector{1, 1}); test.AddAttribute("dilations", std::vector{1, 1}); test.AddAttribute("pads", std::vector{0, 0, 0, 0}); - test.AddInput("input", {1, 5, 5}, std::vector{1.f, 6.f, 11.f, 16.f, 21.f, 2.f, 7.f, 12.f, 17.f, 22.f, 3.f, 8.f, 13.f, 18.f, 23.f, 4.f, 9.f, 14.f, 19.f, 24.f, 5.f, 0.f, 15.f, 20.f, 25.f}); + std::vector input(25); + std::vector output(25); + std::iota(output.begin(), output.end(), 1); + input = _transpose_1dvector(output, 1, 5, 5); + test.AddInput("input", {1, 5, 5}, input); test.AddInput("image_shape", {2}, std::vector{5, 5}); test.AddInput("block_shape", {2}, std::vector{1, 5}); - test.AddOutput("output", {1, 1, 5, 5}, std::vector{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 25.f}); + test.AddOutput("output", {1, 1, 5, 5}, output); + test.Run(); +} + +TEST(Col2ImContribOpTest, with3channels4dNCHW) { + OpTester test("Col2Im", 1, kMSDomain); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(75); + std::vector output(75); + std::iota(output.begin(), output.end(), 1); + input = _transpose_1dvector(output, 3, 5, 5); + test.AddInput("input", {1, 15, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {1, 3, 5, 5}, output); + test.Run(); +} + +TEST(Col2ImContribOpTest, simple5dNCHWD) { + OpTester test("Col2Im", 1, kMSDomain); + + test.AddAttribute("strides", std::vector{1, 1, 1}); + test.AddAttribute("dilations", std::vector{1, 1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0, 0, 0}); + + std::vector input(25); + std::vector output(25); + std::iota(output.begin(), output.end(), 1); + input = _transpose_1dvector(output, 1, 5, 5); + test.AddInput("input", {1, 5, 5}, input); + test.AddInput("image_shape", {3}, std::vector{1, 5, 5}); + test.AddInput("block_shape", {3}, std::vector{1, 1, 5}); + + test.AddOutput("output", {1, 1, 1, 5, 5}, output); test.Run(); } From d00f2c5c0af888b407251d4cb81f7b759a9e29e1 Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Thu, 4 Aug 2022 17:36:52 -0400 Subject: [PATCH 08/30] Add support to N>1 --- .../core/providers/cpu/tensor/col2im.cc | 84 +++++++++++-------- onnxruntime/test/contrib_ops/col2im_test.cc | 44 +++++++--- 2 files changed, 80 insertions(+), 48 deletions(-) diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index f1daf277c2d0e..fc514c453ae43 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include #include "core/providers/cpu/tensor/col2im.h" #include "core/framework/element_type_lists.h" @@ -36,12 +37,15 @@ Status Col2Im::Compute(OpKernelContext* context) const { int64_t image_shape_size = 1; int64_t kernel_shape_size = 1; + int64_t kernel_shape_rank = 0; for (auto i=0; i < image_shape->Shape().Size(); ++i) { + ++kernel_shape_rank; image_shape_size *= image_shape->Data()[i]; kernel_shape_size *= kernel_shape->Data()[i]; // col_input_C computed as => (C*n-ary-prod{kernel_shape}) / n-ary-prod{kernel_shape} col_input_C /= kernel_shape->Data()[i]; } + const int64_t col_input_offset = col_input_C * image_shape_size; TensorShapeVector Y_dims; Y_dims.insert(Y_dims.begin(), {col_input_N, col_input_C}); @@ -51,8 +55,11 @@ Status Col2Im::Compute(OpKernelContext* context) const { TensorShape Yshape(Y_dims); Tensor* Y = context->Output(0, Yshape); T* Ydata = Y->template MutableData(); + for (auto i=0; i < Yshape.Size(); ++i) + Ydata[i] = -1; // just for debug (to know what has been written to Ydata in the end) + const int64_t Y_offset = Yshape.Size() / Yshape[0]; - std::cout << "\n\tInput 0: col_input = ("; for (auto i=0; i < Yshape.Size(); ++i) std::cout << col_input_data[i] << ", "; std::cout << ") with shape "<< Yshape << std::endl; + std::cout << "\n\tInput 0: col_input = ("; for (auto i=0; i < Yshape.Size(); ++i) std::cout << col_input_data[i] << ", "; std::cout << ") with shape "<< col_input_shape << std::endl; std::cout << "\tInput 1: image_shape = ("; for (auto i=0; i < image_shape->Shape().Size(); ++i) std::cout << image_shape->Data()[i] << ", "; std::cout << ")" << std::endl; std::cout << "\tInput 2: kernel_shape = ("; for (auto i=0; i < kernel_shape->Shape().Size(); ++i) std::cout << kernel_shape->Data()[i] << ", "; std::cout << ")" << std::endl; std::cout << "\tAttribute strides = ("; for (size_t i=0; i < col2im_attrs_.strides.size(); ++i) std::cout << col2im_attrs_.strides[i] << ", "; std::cout << ")"<< std::endl; @@ -66,41 +73,46 @@ Status Col2Im::Compute(OpKernelContext* context) const { std::cout << "\n\tStatus Col2Im::Compute() --> math::Col2imNd<>()" << std::endl; - if (image_shape->Shape()[0] == 2) { - std::cout << "image_shape->Shape()[0] == 2 --> Col2Im" << std::endl; - math::Col2im( - col_input_data, - col_input_C, - image_shape->Data()[0], - image_shape->Data()[1], - kernel_shape->Data()[0], - kernel_shape->Data()[1], - col2im_attrs_.dilations[0], - col2im_attrs_.dilations[1], - col2im_attrs_.pads[0], - col2im_attrs_.pads[1], - col2im_attrs_.pads[2], - col2im_attrs_.pads[3], - col2im_attrs_.strides[0], - col2im_attrs_.strides[1], - Ydata, - &CPUMathUtil::Instance()); - } else { - std::cout << "image_shape->Shape()[0] != 2 --> Col2ImNd (nd=" << image_shape->Shape()[0] << ") " << std::endl; - math::Col2imNd( - col_input_data, // const T* data_col, - image_shape->Data(), // const int64_t* img_shape, - Yshape.Slice(2).GetDims().data(), // const int64_t* output_shape, - col_input_C, // int64_t channels_col, --> output_num_channels * kernel_shape_size - image_shape_size, // int64_t img_size, - kernel_shape->Data(), // const int64_t* kernel_shape, - col2im_attrs_.strides.data(), // const int64_t* stride, - col2im_attrs_.dilations.data(), // const int64_t* dilation, - col2im_attrs_.pads.data(), // const int64_t* pad, - kernel_shape->Shape().Size(), // ptrdiff_t N, --> number of spatial dims for image - Ydata, // T* data_img, - &CPUMathUtil::Instance() // Provider* provider - ); + assert(image_shape_size == Y_offset); // just for temp debug + + for (auto image_id = 0; image_id < col_input_N; ++image_id) { + std::cout << "Image " << image_id+1 << " out of "<< col_input_N << std::endl; + if (image_shape->Shape()[0] == 2) { + std::cout << "image_shape->Shape()[0] == 2 --> Col2Im" << std::endl; + math::Col2im( + col_input_data + image_id * col_input_offset, + col_input_C, + image_shape->Data()[0], + image_shape->Data()[1], + kernel_shape->Data()[0], + kernel_shape->Data()[1], + col2im_attrs_.dilations[0], + col2im_attrs_.dilations[1], + col2im_attrs_.pads[0], + col2im_attrs_.pads[1], + col2im_attrs_.pads[2], + col2im_attrs_.pads[3], + col2im_attrs_.strides[0], + col2im_attrs_.strides[1], + Ydata + image_id * Y_offset, + &CPUMathUtil::Instance()); + } else { + std::cout << "image_shape->Shape()[0] != 2 --> Col2ImNd (nd=" << image_shape->Shape()[0] << ") " << std::endl; + math::Col2imNd( + col_input_data + image_id * col_input_offset, // const T* data_col, + image_shape->Data(), // const int64_t* img_shape, + Yshape.Slice(2).GetDims().data(), // const int64_t* output_shape, + col_input_C, // int64_t channels_col, + image_shape_size, // int64_t img_size, + kernel_shape->Data(), // const int64_t* kernel_shape, + col2im_attrs_.strides.data(), // const int64_t* stride, + col2im_attrs_.dilations.data(), // const int64_t* dilation, + col2im_attrs_.pads.data(), // const int64_t* pad, + kernel_shape->Shape().Size(), // ptrdiff_t N, --> #spatial_dims? + Ydata + image_id * Y_offset, // T* data_img, + &CPUMathUtil::Instance() // Provider* provider + ); + } } std::cout << "\n\n Return Col2Im::Compute() --> "; for (auto i=0; i < Yshape.Size(); ++i) std::cout << Ydata[i] << ", "; std::cout << ") with shape " << Yshape << std::endl << std::endl; diff --git a/onnxruntime/test/contrib_ops/col2im_test.cc b/onnxruntime/test/contrib_ops/col2im_test.cc index f98a2d754d049..6dacfa9f02301 100644 --- a/onnxruntime/test/contrib_ops/col2im_test.cc +++ b/onnxruntime/test/contrib_ops/col2im_test.cc @@ -10,26 +10,27 @@ namespace onnxruntime { namespace test { template -std::vector _transpose_1dvector(std::vector &input, size_t C, size_t H, size_t W) +std::vector _transpose_serialized_vector(std::vector &input, size_t N, size_t C, size_t H, size_t W) { - size_t n = input.size(); - if (n == 0){ + size_t input_size = input.size(); + if (input_size == 0){ throw std::runtime_error("Invalid input"); } std::vector trans_vec(input); std::cout << "input: ("; - for(size_t i = 0; i < n; ++i) + for(size_t i = 0; i < input_size; ++i) std::cout << trans_vec[i] << ", "; std::cout << ")" << std::endl; - for(size_t c = 0; c < C; ++c) - for(size_t i = 0; i < H; ++i) - for(size_t j = i+1; j < W; ++j) - std::swap(trans_vec[c*(H*W) + (H*i + j)], trans_vec[c*(H*W) + (W*j + i)]); + for(size_t n = 0; n < N; ++n) + for(size_t c = 0; c < C; ++c) + for(size_t i = 0; i < H; ++i) + for(size_t j = i+1; j < W; ++j) + std::swap(trans_vec[n*(C*H*W) + c*(H*W) + (H*i + j)], trans_vec[n*(C*H*W) + c*(H*W) + (W*j + i)]); std::cout << "trans_vec: ("; - for(size_t i = 0; i < n; ++i) + for(size_t i = 0; i < input_size; ++i) std::cout << trans_vec[i] << ", "; std::cout << ")" << std::endl; @@ -46,7 +47,7 @@ TEST(Col2ImContribOpTest, simple4dNCHW) { std::vector input(25); std::vector output(25); std::iota(output.begin(), output.end(), 1); - input = _transpose_1dvector(output, 1, 5, 5); + input = _transpose_serialized_vector(output, 1, 1, 5, 5); test.AddInput("input", {1, 5, 5}, input); test.AddInput("image_shape", {2}, std::vector{5, 5}); test.AddInput("block_shape", {2}, std::vector{1, 5}); @@ -65,7 +66,7 @@ TEST(Col2ImContribOpTest, with3channels4dNCHW) { std::vector input(75); std::vector output(75); std::iota(output.begin(), output.end(), 1); - input = _transpose_1dvector(output, 3, 5, 5); + input = _transpose_serialized_vector(output, 1, 3, 5, 5); test.AddInput("input", {1, 15, 5}, input); test.AddInput("image_shape", {2}, std::vector{5, 5}); test.AddInput("block_shape", {2}, std::vector{1, 5}); @@ -74,6 +75,25 @@ TEST(Col2ImContribOpTest, with3channels4dNCHW) { test.Run(); } +TEST(Col2ImContribOpTest, with2Images3channels4dNCHW) { + OpTester test("Col2Im", 1, kMSDomain); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(150); + std::vector output(150); + std::iota(output.begin(), output.end(), 1); + input = _transpose_serialized_vector(output, 2, 3, 5, 5); + test.AddInput("input", {2, 15, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {2, 3, 5, 5}, output); + test.Run(); +} + TEST(Col2ImContribOpTest, simple5dNCHWD) { OpTester test("Col2Im", 1, kMSDomain); @@ -84,7 +104,7 @@ TEST(Col2ImContribOpTest, simple5dNCHWD) { std::vector input(25); std::vector output(25); std::iota(output.begin(), output.end(), 1); - input = _transpose_1dvector(output, 1, 5, 5); + input = _transpose_serialized_vector(output, 1, 1, 5, 5); test.AddInput("input", {1, 5, 5}, input); test.AddInput("image_shape", {3}, std::vector{1, 5, 5}); test.AddInput("block_shape", {3}, std::vector{1, 1, 5}); From 5b27cb1cfb679c3c2b805ad4c923a62c10f718bb Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Tue, 9 Aug 2022 18:35:48 -0400 Subject: [PATCH 09/30] Add logs and Rama's suggestions --- .../core/graph/contrib_ops/contrib_defs.cc | 9 +- .../core/providers/cpu/tensor/col2im.cc | 71 ++++++---- onnxruntime/core/util/math_cpu.cc | 127 +++++++++++++----- onnxruntime/test/contrib_ops/col2im_test.cc | 69 ++++++++-- .../python/contrib_ops/onnx_test_col2im.py | 4 +- 5 files changed, 204 insertions(+), 76 deletions(-) diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index 8fae65f390e4b..cc992a7dfbdbe 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -1031,7 +1031,7 @@ ONNX_MS_OPERATOR_SET_SCHEMA(Col2Im, 1, "The value represent the number of pixels added to the beginning " "and end part of the corresponding axis. `pads` format should be as follow " "[x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin is the number of pixels " - "added at the beginning of axis `i` and xi_end is the number of pixels added at the end of axis `i`. " + "added at the beginning of axis `i` and xi_end the same for the end of axis `i`. " "If not present, the padding defaults to 0 along start and end of each spatial axis.", AttributeProto::INTS, OPTIONAL_VALUE) @@ -1056,7 +1056,7 @@ ONNX_MS_OPERATOR_SET_SCHEMA(Col2Im, 1, 1, "image_shape", "The shape of the spatial dimensions of the image after rearranging the column blocks." - "This is a 1-dimensional tensor with size of at least 2, containing the value [H_img, W_img] " + "This is a 1-dim tensor with size of at least 2, containing the value [H_img, W_img] " " for a 2-D image or [dim_i1, dim_i2, ..., dim_iN] for a N-D image.", "tensor(int64)", OpSchema::Single, @@ -1067,8 +1067,9 @@ ONNX_MS_OPERATOR_SET_SCHEMA(Col2Im, 1, 2, "block_shape", "The shape of the block to apply on the input." - "This is a 1-dimensional tensor of size of at least 2, containing the value [H_block, W_block] " - " for a 2-D image or [dim_b1, dim_b2, ..., dim_bN] for a N-D block.", + "This is a 1-dim tensor of size of at least 2, containing the value [H_block, W_block] " + " for a 2-D image or [dim_b1, dim_b2, ..., dim_bN] for a N-D block." + "Dilations, pads and strides are applied to block_shape under the hood.", "tensor(int64)", OpSchema::Single, true, diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index fc514c453ae43..51af71364ca15 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -32,7 +32,6 @@ Status Col2Im::Compute(OpKernelContext* context) const { const T* col_input_data = col_input->template Data(); TensorShape col_input_shape = col_input->Shape(); - int64_t col_input_C = col_input_shape[1]; const auto col_input_N = col_input_shape[0]; int64_t image_shape_size = 1; @@ -42,13 +41,13 @@ Status Col2Im::Compute(OpKernelContext* context) const { ++kernel_shape_rank; image_shape_size *= image_shape->Data()[i]; kernel_shape_size *= kernel_shape->Data()[i]; - // col_input_C computed as => (C*n-ary-prod{kernel_shape}) / n-ary-prod{kernel_shape} - col_input_C /= kernel_shape->Data()[i]; } - const int64_t col_input_offset = col_input_C * image_shape_size; + const int64_t C = col_input_shape[1] / kernel_shape_size; + const int64_t col_output_stride = col_input_shape.SizeFromDimension(1); + const int64_t col_input_stride = C * image_shape_size; TensorShapeVector Y_dims; - Y_dims.insert(Y_dims.begin(), {col_input_N, col_input_C}); + Y_dims.insert(Y_dims.begin(), {col_input_N, C}); for (auto i=0; i < image_shape->Shape()[0]; ++i) { Y_dims.push_back(image_shape->Data()[i]); } @@ -56,32 +55,38 @@ Status Col2Im::Compute(OpKernelContext* context) const { Tensor* Y = context->Output(0, Yshape); T* Ydata = Y->template MutableData(); for (auto i=0; i < Yshape.Size(); ++i) - Ydata[i] = -1; // just for debug (to know what has been written to Ydata in the end) - const int64_t Y_offset = Yshape.Size() / Yshape[0]; - - std::cout << "\n\tInput 0: col_input = ("; for (auto i=0; i < Yshape.Size(); ++i) std::cout << col_input_data[i] << ", "; std::cout << ") with shape "<< col_input_shape << std::endl; - std::cout << "\tInput 1: image_shape = ("; for (auto i=0; i < image_shape->Shape().Size(); ++i) std::cout << image_shape->Data()[i] << ", "; std::cout << ")" << std::endl; - std::cout << "\tInput 2: kernel_shape = ("; for (auto i=0; i < kernel_shape->Shape().Size(); ++i) std::cout << kernel_shape->Data()[i] << ", "; std::cout << ")" << std::endl; - std::cout << "\tAttribute strides = ("; for (size_t i=0; i < col2im_attrs_.strides.size(); ++i) std::cout << col2im_attrs_.strides[i] << ", "; std::cout << ")"<< std::endl; - std::cout << "\tAttribute dilations = ("; for (size_t i=0; i < col2im_attrs_.dilations.size(); ++i) std::cout << col2im_attrs_.dilations[i] << ", "; std::cout << ")"<< std::endl; - std::cout << "\tAttribute pads = ("; for (size_t i=0; i < col2im_attrs_.pads.size(); ++i) std::cout << col2im_attrs_.pads[i] << ", "; std::cout << ")"<< std::endl; - - std::cout << "\tVariable col_input_C: " << col_input_C << std::endl; + Ydata[i] = -1; // just for debug (to know what has been written to Ydata in the end) + // const int64_t Y_offset = Yshape.Size() / Yshape[0]; + + std::cout << "\n\tInput 0: col_input = ("; for (auto i=0; i < Yshape.Size(); ++i) std::cout << + col_input_data[i] << ", "; std::cout << ") with shape "<< col_input_shape << std::endl; + std::cout << "\tInput 1: image_shape = ("; for (auto i=0; i < image_shape->Shape().Size(); ++i) std::cout << + image_shape->Data()[i] << ", "; std::cout << ")" << std::endl; + std::cout << "\tInput 2: kernel_shape = ("; for (auto i=0; i < kernel_shape->Shape().Size(); ++i) std::cout << + kernel_shape->Data()[i] << ", "; std::cout << ")" << std::endl; + std::cout << "\tAttribute strides = ("; for (size_t i=0; i < col2im_attrs_.strides.size(); ++i) std::cout << + col2im_attrs_.strides[i] << ", "; std::cout << ")"<< std::endl; + std::cout << "\tAttribute dilations = ("; for (size_t i=0; i < col2im_attrs_.dilations.size(); ++i) std::cout << + col2im_attrs_.dilations[i] << ", "; std::cout << ")"<< std::endl; + std::cout << "\tAttribute pads = ("; for (size_t i=0; i < col2im_attrs_.pads.size(); ++i) std::cout << + col2im_attrs_.pads[i] << ", "; std::cout << ")"<< std::endl; + + std::cout << "\tVariable C: " << C << std::endl; std::cout << "\tVariable col_input_N = " << col_input_N << std::endl; std::cout << "\tVariable image_shape_size: " << image_shape_size << std::endl; std::cout << "\tVariable kernel_shape_size: " << kernel_shape_size << std::endl; std::cout << "\n\tStatus Col2Im::Compute() --> math::Col2imNd<>()" << std::endl; - assert(image_shape_size == Y_offset); // just for temp debug + assert(image_shape_size == Y_offset); // just for temp debug for (auto image_id = 0; image_id < col_input_N; ++image_id) { std::cout << "Image " << image_id+1 << " out of "<< col_input_N << std::endl; if (image_shape->Shape()[0] == 2) { std::cout << "image_shape->Shape()[0] == 2 --> Col2Im" << std::endl; math::Col2im( - col_input_data + image_id * col_input_offset, - col_input_C, + col_input_data + image_id * col_output_stride, + C, image_shape->Data()[0], image_shape->Data()[1], kernel_shape->Data()[0], @@ -94,27 +99,41 @@ Status Col2Im::Compute(OpKernelContext* context) const { col2im_attrs_.pads[3], col2im_attrs_.strides[0], col2im_attrs_.strides[1], - Ydata + image_id * Y_offset, + Ydata + image_id * col_input_stride, &CPUMathUtil::Instance()); } else { std::cout << "image_shape->Shape()[0] != 2 --> Col2ImNd (nd=" << image_shape->Shape()[0] << ") " << std::endl; math::Col2imNd( - col_input_data + image_id * col_input_offset, // const T* data_col, + col_input_data + image_id * col_output_stride, // const T* data_col, image_shape->Data(), // const int64_t* img_shape, Yshape.Slice(2).GetDims().data(), // const int64_t* output_shape, - col_input_C, // int64_t channels_col, + // col_input_shape[1], // int64_t channels_col, + // leads to output + // {1, -nan, 11, 16, 2.58141e+34, 2, 8.80295e+34, 12, 17, 22, 3, 4.59718e+24, 13, 18, 2.85144e+34, 4, + // -443.863, 14, -nan, 24, 5, 10, 15, 20, 25} + // that is similar to input with some spots with random values + + C, // int64_t channels_col, + // leads to output {1, 6, 11, 16, 21, 2, 7, 12, 17, 22, 3, 8, 13, 18, 23, 4, 9, 14, 19, 24, 5, 10, 15, 20, 25, } + // that is identical to input + + // col_input_shape[2], // int64_t channels_col, + // leads to output + // {1, 6, 1.92869e+31, 4.84145e+30, 1.88774e+31, 2, 7, 12, 17, 22, 3, 8, 1.86549e+31, 3.40686e+25, + // 2.20182e+24, 4, -2.56655e+29, 5.08551e+31, -1.05888e+29, 1.51107e+29, 5, 10, 15, 20, 7.2793e+31} + // that is very similar to input, but with some rounded numbers and corrupted "25" value image_shape_size, // int64_t img_size, kernel_shape->Data(), // const int64_t* kernel_shape, col2im_attrs_.strides.data(), // const int64_t* stride, col2im_attrs_.dilations.data(), // const int64_t* dilation, col2im_attrs_.pads.data(), // const int64_t* pad, kernel_shape->Shape().Size(), // ptrdiff_t N, --> #spatial_dims? - Ydata + image_id * Y_offset, // T* data_img, - &CPUMathUtil::Instance() // Provider* provider - ); + Ydata + image_id * col_input_stride, // T* data_img, + &CPUMathUtil::Instance()); // Provider* provider } } - std::cout << "\n\n Return Col2Im::Compute() --> "; for (auto i=0; i < Yshape.Size(); ++i) std::cout << Ydata[i] << ", "; std::cout << ") with shape " << Yshape << std::endl << std::endl; + std::cout << "\n\n Return Col2Im::Compute() --> "; for (auto i=0; i < Yshape.Size(); ++i) std::cout << + Ydata[i] << ", "; std::cout << ") with shape " << Yshape << std::endl << std::endl; return Status::OK(); } diff --git a/onnxruntime/core/util/math_cpu.cc b/onnxruntime/core/util/math_cpu.cc index 709941a819f79..3b5fa0a549af9 100644 --- a/onnxruntime/core/util/math_cpu.cc +++ b/onnxruntime/core/util/math_cpu.cc @@ -36,6 +36,8 @@ using onnxruntime::concurrency::ThreadPool; namespace onnxruntime { namespace math { +using std::cout; + // MatMul implementation purely based on Eigen. #define EIGEN_MATMUL_FUNCTION(T) \ template <> \ @@ -308,23 +310,56 @@ void Im2col::operator()( int64_t stride_w, T* data_col, T padding_value) { + + cout << "void Im2col::operator()("; + cout << "\n\tconst T* data_im=" << data_im; + cout << "\n\tint64_t channels=" << channels; + cout << "\n\tint64_t heigh=" << height; + cout << "\n\tint64_t width=" << width; + cout << "\n\tint64_t kernel_h=" << kernel_h; + cout << "\n\tint64_t kernel_w=" << kernel_w; + cout << "\n\tint64_t dilation_h=" << dilation_h; + cout << "\n\tint64_t dilation_w=" << dilation_w; + cout << "\n\tint64_t pad_t=" << pad_t; + cout << "\n\tint64_t pad_l=" << pad_l; + cout << "\n\tint64_t pad_b=" << pad_b; + cout << "\n\tint64_t pad_r=" << pad_r; + cout << "\n\tint64_t stride_h=" << stride_h; + cout << "\n\tint64_t stride_w=" << stride_w; + cout << "\n\tT* data_col=" << data_col; + cout << "\n\tT padding_value=" << padding_value << ")" << std::endl; + + int data_col_offset = 0; const int64_t output_h = (height + pad_b + pad_t - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int64_t output_w = (width + pad_l + pad_r - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; - + cout << "output_h: " << output_h << std::endl; + cout << "output_w: " << output_w << std::endl; // From Intel, https://github.com/BVLC/caffe/pull/3536 int64_t channel_size = height * width; + cout << "channel_size (height * width): " << channel_size << std::endl; for (int64_t channel = channels; channel--; data_im += channel_size) { + cout << "for channel= " << channel << "/channel_size=" << channel_size << std::endl; for (int64_t kernel_row = 0; kernel_row < kernel_h; kernel_row++) { + cout << "\tfor kernel_row= " << kernel_row << "/kernel_h=" << kernel_h << std::endl; for (int64_t kernel_col = 0; kernel_col < kernel_w; kernel_col++) { + cout << "\t\tfor kernel_col= " << kernel_col << "/kernel_w=" << kernel_w << std::endl; int64_t input_row = -pad_t + kernel_row * dilation_h; + cout << "\t\t\tinput_row= " << input_row << std::endl; for (int64_t output_rows = output_h; output_rows; output_rows--) { + cout << "\t\t\tfor output_rows= " << output_rows << "/output_h=" << output_h << std::endl; if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { std::fill_n(data_col, output_w, padding_value); + cout << "\t\t\t\t(input_row out of bounds) data_col["<< data_col_offset << "] = " << padding_value << + " * " << output_w << " times"<< std::endl; + data_col_offset += output_w; data_col += output_w; } else { int64_t input_col = -pad_l + kernel_col * dilation_w; + cout << "\t\t\t\tinput_col= " << input_col << std::endl; + cout << "\t\t\t\tinput_pos= " << input_row * width + input_col << std::endl; const T* rdptr = data_im + input_row * width + input_col; for (int64_t i = 0; i < output_w;) { + cout << "\t\t\t\tfor i= " << i << "/output_w=" << output_w << std::endl; int64_t output_handled = 1; if (is_a_ge_zero_and_a_lt_b(input_col, width)) { if (stride_w == 1) { @@ -332,6 +367,10 @@ void Im2col::operator()( // and the number of output elements to produce. output_handled = std::min(width - input_col, output_w - i); data_col = std::copy_n(&rdptr[i], static_cast(output_handled), data_col); + cout << "\t\t\t\t\tdata_col["<< data_col_offset << "] = " << rdptr[i] << std::endl; + data_col_offset += output_handled; + // cout << "\t\t\t\t\t(stride 1) Copied " << output_handled << + // " element(s) from data_im to data_col: " << rdptr[i] << std::endl; } else if (stride_w == 2) { // Same as above except using the number of strided input elements. output_handled = std::min((width - input_col + 1) / 2, output_w - i); @@ -340,11 +379,16 @@ void Im2col::operator()( *(data_col++) = *local_rdptr; local_rdptr += 2; } + cout << "\t\t\t\t\t(stride 2) Copy " << output_handled + << " elements from data_im to data_col " << std::endl; } else { *(data_col++) = rdptr[i * stride_w]; + cout << "\t\t\t\t\t(stride >2) Copy 1 element from data_im to data_col " << std::endl; } } else { *(data_col++) = padding_value; + cout << "\t\t\t\t\t(input_col out of bounds) fill data_col with 1 padding_value= " << + padding_value << std::endl; } input_col += output_handled * stride_w; i += output_handled; @@ -376,21 +420,30 @@ void Im2col::operator()( int64_t output_shape_size = std::accumulate(output_shape, output_shape + rank, 1LL, std::multiplies()); int64_t kernel_shape_size = std::accumulate(kernel_shape, kernel_shape + rank, 1LL, std::multiplies()); - std::cout << "\n\nCalled void Im2col::operator()("; - std::cout << ",\n\tconst T* data_im={"; for (auto i=0; i < im_shape_size; ++i) std::cout << data_im[i] << ", "; std::cout << "}"; - std::cout << ",\n\tconst int64_t* im_shape={"; for (auto i=0; i < rank; ++i) std::cout << im_shape[i] << ", "; std::cout << "}"; - std::cout << ",\n\tconst int64_t* output_shape={"; for (auto i=0; i < rank; ++i) std::cout << output_shape[i] << ", "; std::cout << "}"; - std::cout << ",\n\tint64_t channels_col=" << channels_col; - std::cout << ",\n\tconst int64_t* kernel_shape={"; for (auto i=0; i < rank; ++i) std::cout << kernel_shape[i] << ", "; std::cout << "}"; - std::cout << ",\n\tconst int64_t* stride={"; for (auto i=0; i < rank; ++i) std::cout << stride[i] << ", "; std::cout << "}"; - std::cout << ",\n\tconst int64_t* dilation={"; for (auto i=0; i < rank; ++i) std::cout << dilation[i] << ", "; std::cout << "}"; - std::cout << ",\n\tconst int64_t* pad={"; for (auto i=0; i < 2*rank; ++i) std::cout << pad[i] << ", "; std::cout << "}"; - std::cout << ",\n\tptrdiff_t rank=" << rank; - std::cout << ",\n\tT* data_col= preallocated pointer to write at {"; for (auto i=0; i < output_shape_size; ++i) std::cout << data_col[i] << ", "; std::cout << "}"; - std::cout << ",\n\tbool accumulate_output=" << accumulate_output; - std::cout << ",\n\tT padding_value=" << padding_value << ")"; - - std::cout << "\n\n\tVariable im_shape_size: " << im_shape_size << "\n\tVariable output_shape_size: "<::operator()("; + cout << ",\n\tconst T* data_im={"; for (auto i=0; i < im_shape_size; ++i) cout << data_im[i] << + ", "; cout << "}"; + cout << ",\n\tconst int64_t* im_shape={"; for (auto i=0; i < rank; ++i) cout << im_shape[i] << + ", "; cout << "}"; + cout << ",\n\tconst int64_t* output_shape={"; for (auto i=0; i < rank; ++i) cout << output_shape[i] << + ", "; cout << "}"; + cout << ",\n\tint64_t channels_col=" << channels_col; + cout << ",\n\tconst int64_t* kernel_shape={"; for (auto i=0; i < rank; ++i) cout << kernel_shape[i] << + ", "; cout << "}"; + cout << ",\n\tconst int64_t* stride={"; for (auto i=0; i < rank; ++i) cout << stride[i] << + ", "; cout << "}"; + cout << ",\n\tconst int64_t* dilation={"; for (auto i=0; i < rank; ++i) cout << dilation[i] << + ", "; cout << "}"; + cout << ",\n\tconst int64_t* pad={"; for (auto i=0; i < 2*rank; ++i) cout << pad[i] << ", "; + cout << "}"; + cout << ",\n\tptrdiff_t rank=" << rank; + cout << ",\n\tT* data_col= preallocated pointer to write at {"; for (auto i=0; i < output_shape_size; ++i) cout << + data_col[i] << ", "; cout << "}"; + cout << ",\n\tbool accumulate_output=" << accumulate_output; + cout << ",\n\tT padding_value=" << padding_value << ")"; + + cout << "\n\n\tVariable im_shape_size: " << im_shape_size << "\n\tVariable output_shape_size: " << + output_shape_size << "\n\tVariable kernel_shape_size: " << kernel_shape_size << std::endl << std::endl; std::vector d_offset(rank, 0); std::vector d_iter(rank, 0); @@ -430,7 +483,8 @@ void Im2col::operator()( } while (NextPosition(rank, output_shape, d_iter.data())); } // for (int c = 0; c < channels_col; ++c) { - std::cout << "Return void Im2col -> T* data_col={"; for (auto i=0; i < output_shape_size; ++i) std::cout << data_col[i] << ", "; std::cout << "}\n"; + cout << "Return void Im2col -> T* data_col={"; for (auto i=0; i < output_shape_size; ++i) cout << + data_col[i] << ", "; cout << "}\n"; } template struct Im2col; @@ -815,19 +869,26 @@ void Col2imNd(const float* data_col, ptrdiff_t N, float* data_img, CPUMathUtil* context) { - std::cout << "\n\nCalled void Col2imNd("; - std::cout << ",\n\tconst float* data_col={"; for (auto i=0; i < img_size; ++i) std::cout << data_col[i] << ", "; std::cout << "}"; - std::cout << ",\n\tconst int64_t* img_shape={"; for (auto i=0; i < N; ++i) std::cout << img_shape[i] << ", "; std::cout << "}"; - std::cout << ",\n\tconst int64_t* output_shape={"; for (auto i=0; i < N; ++i) std::cout << output_shape[i] << ", "; std::cout << "}"; - std::cout << ",\n\tint64_t channels_col=" << channels_col; - std::cout << ",\n\tint64_t img_size=" << img_size; - std::cout << ",\n\tconst int64_t* kernel_shape={"; for (auto i=0; i < N; ++i) std::cout << kernel_shape[i] << ", "; std::cout << "}"; - std::cout << ",\n\tconst int64_t* stride={"; for (auto i=0; i < N; ++i) std::cout << stride[i] << ", "; std::cout << "}"; - std::cout << ",\n\tconst int64_t* dilation={"; for (auto i=0; i < N; ++i) std::cout << dilation[i] << ", "; std::cout << "}"; - std::cout << ",\n\tconst int64_t* pad={"; for (auto i=0; i < 2*N; ++i) std::cout << pad[i] << ", "; std::cout << "}"; - std::cout << ",\n\tptrdiff_t N=" << N; - std::cout << ",\n\tfloat* data_img= preallocated pointer to save at {"; for (auto i=0; i < img_size; ++i) std::cout << data_img[i] << ", "; std::cout << "}"; - std::cout << ",\n\tCPUMathUtil* context=...)" << std::endl; + cout << "\n\nCalled void Col2imNd("; + cout << ",\n\tconst float* data_col={"; for (auto i=0; i < img_size; ++i) cout << + data_col[i] << ", "; cout << "}"; + cout << ",\n\tconst int64_t* img_shape={"; for (auto i=0; i < N; ++i) cout << img_shape[i] << + ", "; cout << "}"; + cout << ",\n\tconst int64_t* output_shape={"; for (auto i=0; i < N; ++i) cout << output_shape[i] << + ", "; cout << "}"; + cout << ",\n\tint64_t channels_col=" << channels_col; + cout << ",\n\tint64_t img_size=" << img_size; + cout << ",\n\tconst int64_t* kernel_shape={"; for (auto i=0; i < N; ++i) cout << kernel_shape[i] << + ", "; cout << "}"; + cout << ",\n\tconst int64_t* stride={"; for (auto i=0; i < N; ++i) cout << stride[i] << ", "; + cout << "}"; + cout << ",\n\tconst int64_t* dilation={"; for (auto i=0; i < N; ++i) cout << dilation[i] << ", "; + cout << "}"; + cout << ",\n\tconst int64_t* pad={"; for (auto i=0; i < 2*N; ++i) cout << pad[i] << ", "; cout << "}"; + cout << ",\n\tptrdiff_t N=" << N; + cout << ",\n\tfloat* data_img= preallocated pointer to save at {"; for (auto i=0; i < img_size; ++i) cout << + data_img[i] << ", "; cout << "}"; + cout << ",\n\tCPUMathUtil* context=...)" << std::endl; Set(gsl::narrow(img_size), 0, data_img, context); Im2col()( @@ -841,10 +902,10 @@ void Col2imNd(const float* data_col, pad, // const int64_t* pad, N, // ptrdiff_t rank, data_img, // T* data_col, - true // bool accumulate_output, - ); + true); // bool accumulate_output, - std::cout << "Return void Col2imNd --> float* data_img= {"; for (auto i=0; i < img_size; ++i) std::cout << data_img[i] << ", "; std::cout << "}"; + cout << "Return void Col2imNd --> float* data_img= {"; for (auto i=0; i < img_size; ++i) cout << + data_img[i] << ", "; cout << "}"; } #define SPECIALIZED_COPYVECTOR(T) \ diff --git a/onnxruntime/test/contrib_ops/col2im_test.cc b/onnxruntime/test/contrib_ops/col2im_test.cc index 6dacfa9f02301..57638b08bcb89 100644 --- a/onnxruntime/test/contrib_ops/col2im_test.cc +++ b/onnxruntime/test/contrib_ops/col2im_test.cc @@ -10,27 +10,26 @@ namespace onnxruntime { namespace test { template -std::vector _transpose_serialized_vector(std::vector &input, size_t N, size_t C, size_t H, size_t W) -{ +std::vector _transpose_serialized_vector(std::vector &input, size_t N, size_t C, size_t H, size_t W) { size_t input_size = input.size(); - if (input_size == 0){ + if (input_size == 0) { throw std::runtime_error("Invalid input"); } std::vector trans_vec(input); std::cout << "input: ("; - for(size_t i = 0; i < input_size; ++i) + for (size_t i = 0; i < input_size; ++i) std::cout << trans_vec[i] << ", "; std::cout << ")" << std::endl; - for(size_t n = 0; n < N; ++n) - for(size_t c = 0; c < C; ++c) - for(size_t i = 0; i < H; ++i) - for(size_t j = i+1; j < W; ++j) - std::swap(trans_vec[n*(C*H*W) + c*(H*W) + (H*i + j)], trans_vec[n*(C*H*W) + c*(H*W) + (W*j + i)]); + for (size_t n = 0; n < N; ++n) + for (size_t c = 0; c < C; ++c) + for (size_t h = 0; h < H; ++h) + for (size_t w = 0; w < W; ++w) + trans_vec[n * (C * H * W) + c * (H * W) + (h + H * w)] = input[n * (C * H * W) + c * (H * W) + (w + W * h)]; std::cout << "trans_vec: ("; - for(size_t i = 0; i < input_size; ++i) + for (size_t i = 0; i < input_size; ++i) std::cout << trans_vec[i] << ", "; std::cout << ")" << std::endl; @@ -56,6 +55,25 @@ TEST(Col2ImContribOpTest, simple4dNCHW) { test.Run(); } +TEST(Col2ImContribOpTest, with2Images3channelsNonSquare4dNCHW) { + OpTester test("Col2Im", 1, kMSDomain); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(120); + std::vector output(120); + std::iota(output.begin(), output.end(), 1); + input = _transpose_serialized_vector(output, 2, 3, 4, 5); + test.AddInput("input", {2, 15, 4}, input); + test.AddInput("image_shape", {2}, std::vector{4, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {2, 3, 4, 5}, output); + test.Run(); +} + TEST(Col2ImContribOpTest, with3channels4dNCHW) { OpTester test("Col2Im", 1, kMSDomain); @@ -108,11 +126,40 @@ TEST(Col2ImContribOpTest, simple5dNCHWD) { test.AddInput("input", {1, 5, 5}, input); test.AddInput("image_shape", {3}, std::vector{1, 5, 5}); test.AddInput("block_shape", {3}, std::vector{1, 1, 5}); - test.AddOutput("output", {1, 1, 1, 5, 5}, output); test.Run(); } +TEST(Im2ColContribOpTest, simple) { + std::vector input(24); + std::vector expected_output(24); + std::iota(input.begin(), input.end(), 1); + expected_output = {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12, 13, 17, 21, 14, 18, 22, 15, 19, 23, 16, 20, 24}; + float* actual_output = new float(24); + std::cout << "\nExpected output --> "; for (auto i=0; i < 24; ++i) std::cout << expected_output[i] << ", "; + std::cout << ")" << std::endl; + math::Im2col()( + input.data(), + int64_t(2), + int64_t(3), + int64_t(4), + int64_t(1), + int64_t(4), + int64_t(1), + int64_t(1), + int64_t(0), + int64_t(0), + int64_t(0), + int64_t(0), + int64_t(1), + int64_t(1), + actual_output, + 0.); + + std::cout << "\nActual output --> "; for (auto i=0; i < 24; ++i) std::cout << actual_output[i] << + ", "; std::cout << ")" << std::endl; + delete[] actual_output; +} } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/python/contrib_ops/onnx_test_col2im.py b/onnxruntime/test/python/contrib_ops/onnx_test_col2im.py index 97269d895a125..31c5f129fad6b 100644 --- a/onnxruntime/test/python/contrib_ops/onnx_test_col2im.py +++ b/onnxruntime/test/python/contrib_ops/onnx_test_col2im.py @@ -12,7 +12,7 @@ class ONNXReferenceImplementationTest(unittest.TestCase): def test_col2im(self) -> None: - input = np.array( + inputs = np.array( [ [ [1.0, 6.0, 11.0, 16.0, 21.0], # (1, 5, 5) @@ -45,7 +45,7 @@ def test_col2im(self) -> None: expect( node, - inputs=[input, image_shape, block_shape], + inputs=[inputs, image_shape, block_shape], outputs=[col2im_reference_implementation], name="test_col2im", ) From 9ed857f005d5aa1cd0c0db00561e4eeceb8b81a9 Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Wed, 10 Aug 2022 20:32:12 -0400 Subject: [PATCH 10/30] Add dilated kernel shape as per Rama review --- .../core/providers/cpu/tensor/col2im.cc | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index 51af71364ca15..0c9b2a30feca9 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -28,7 +28,14 @@ Status Col2Im::Compute(OpKernelContext* context) const { const auto* col_input = context->Input(0); const auto* image_shape = context->Input(1); const auto* kernel_shape = context->Input(2); + + // TODO(rama): Kernel with dilation + TensorShapeVector dilated_kernel_shape_dims; std::cout << "Status Col2Im::Compute(OpKernelContext* context)" << std::endl; + for (auto i=0; i < kernel_shape->Shape().Size(); ++i) { + dilated_kernel_shape_dims[i] = col2im_attrs_.dilations[i] * (kernel_shape->Data()[i] - 1) + 1; + } + TensorShape dilated_kernel_shape(dilated_kernel_shape_dims); const T* col_input_data = col_input->template Data(); TensorShape col_input_shape = col_input->Shape(); @@ -40,7 +47,7 @@ Status Col2Im::Compute(OpKernelContext* context) const { for (auto i=0; i < image_shape->Shape().Size(); ++i) { ++kernel_shape_rank; image_shape_size *= image_shape->Data()[i]; - kernel_shape_size *= kernel_shape->Data()[i]; + kernel_shape_size *= dilated_kernel_shape_dims[i]; } const int64_t C = col_input_shape[1] / kernel_shape_size; const int64_t col_output_stride = col_input_shape.SizeFromDimension(1); @@ -54,9 +61,6 @@ Status Col2Im::Compute(OpKernelContext* context) const { TensorShape Yshape(Y_dims); Tensor* Y = context->Output(0, Yshape); T* Ydata = Y->template MutableData(); - for (auto i=0; i < Yshape.Size(); ++i) - Ydata[i] = -1; // just for debug (to know what has been written to Ydata in the end) - // const int64_t Y_offset = Yshape.Size() / Yshape[0]; std::cout << "\n\tInput 0: col_input = ("; for (auto i=0; i < Yshape.Size(); ++i) std::cout << col_input_data[i] << ", "; std::cout << ") with shape "<< col_input_shape << std::endl; @@ -73,9 +77,10 @@ Status Col2Im::Compute(OpKernelContext* context) const { std::cout << "\tVariable C: " << C << std::endl; std::cout << "\tVariable col_input_N = " << col_input_N << std::endl; - std::cout << "\tVariable image_shape_size: " << image_shape_size << std::endl; - std::cout << "\tVariable kernel_shape_size: " << kernel_shape_size << std::endl; - + std::cout << "\tVariable image_shape_size: " << image_shape_size << std::endl; + std::cout << "\tVariable kernel_shape_size: " << kernel_shape_size << std::endl; + std::cout << "\tVariable: dilated_kernel_shape = ("; for (auto i=0; i < dilated_kernel_shape.Size(); ++i) std::cout << + dilated_kernel_shape[i] << ", "; std::cout << ")" << std::endl; std::cout << "\n\tStatus Col2Im::Compute() --> math::Col2imNd<>()" << std::endl; assert(image_shape_size == Y_offset); // just for temp debug @@ -123,11 +128,11 @@ Status Col2Im::Compute(OpKernelContext* context) const { // 2.20182e+24, 4, -2.56655e+29, 5.08551e+31, -1.05888e+29, 1.51107e+29, 5, 10, 15, 20, 7.2793e+31} // that is very similar to input, but with some rounded numbers and corrupted "25" value image_shape_size, // int64_t img_size, - kernel_shape->Data(), // const int64_t* kernel_shape, + dilated_kernel_shape_dims.data(), // const int64_t* kernel_shape, col2im_attrs_.strides.data(), // const int64_t* stride, col2im_attrs_.dilations.data(), // const int64_t* dilation, col2im_attrs_.pads.data(), // const int64_t* pad, - kernel_shape->Shape().Size(), // ptrdiff_t N, --> #spatial_dims? + dilated_kernel_shape.Size(), // ptrdiff_t N, --> #spatial_dims? Ydata + image_id * col_input_stride, // T* data_img, &CPUMathUtil::Instance()); // Provider* provider } From a09c151bf053948b5073a16969dc46bd12038a8d Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Thu, 11 Aug 2022 15:17:36 -0400 Subject: [PATCH 11/30] Add support to dilation/padding/strides --- .../core/providers/cpu/tensor/col2im.cc | 31 +++++++-------- onnxruntime/test/contrib_ops/col2im_test.cc | 39 +++++++++++++++++++ 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index 0c9b2a30feca9..08c315193479d 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -32,10 +32,6 @@ Status Col2Im::Compute(OpKernelContext* context) const { // TODO(rama): Kernel with dilation TensorShapeVector dilated_kernel_shape_dims; std::cout << "Status Col2Im::Compute(OpKernelContext* context)" << std::endl; - for (auto i=0; i < kernel_shape->Shape().Size(); ++i) { - dilated_kernel_shape_dims[i] = col2im_attrs_.dilations[i] * (kernel_shape->Data()[i] - 1) + 1; - } - TensorShape dilated_kernel_shape(dilated_kernel_shape_dims); const T* col_input_data = col_input->template Data(); TensorShape col_input_shape = col_input->Shape(); @@ -47,8 +43,10 @@ Status Col2Im::Compute(OpKernelContext* context) const { for (auto i=0; i < image_shape->Shape().Size(); ++i) { ++kernel_shape_rank; image_shape_size *= image_shape->Data()[i]; - kernel_shape_size *= dilated_kernel_shape_dims[i]; + kernel_shape_size *= kernel_shape->Data()[i]; + dilated_kernel_shape_dims.push_back(col2im_attrs_.dilations[i] * (kernel_shape->Data()[i] - 1) + 1); } + TensorShape dilated_kernel_shape(dilated_kernel_shape_dims); const int64_t C = col_input_shape[1] / kernel_shape_size; const int64_t col_output_stride = col_input_shape.SizeFromDimension(1); const int64_t col_input_stride = C * image_shape_size; @@ -79,12 +77,10 @@ Status Col2Im::Compute(OpKernelContext* context) const { std::cout << "\tVariable col_input_N = " << col_input_N << std::endl; std::cout << "\tVariable image_shape_size: " << image_shape_size << std::endl; std::cout << "\tVariable kernel_shape_size: " << kernel_shape_size << std::endl; - std::cout << "\tVariable: dilated_kernel_shape = ("; for (auto i=0; i < dilated_kernel_shape.Size(); ++i) std::cout << + std::cout << "\tVariable: dilated_kernel_shape = ("; for (size_t i=0; i < dilated_kernel_shape.NumDimensions(); ++i) std::cout << dilated_kernel_shape[i] << ", "; std::cout << ")" << std::endl; std::cout << "\n\tStatus Col2Im::Compute() --> math::Col2imNd<>()" << std::endl; - assert(image_shape_size == Y_offset); // just for temp debug - for (auto image_id = 0; image_id < col_input_N; ++image_id) { std::cout << "Image " << image_id+1 << " out of "<< col_input_N << std::endl; if (image_shape->Shape()[0] == 2) { @@ -112,27 +108,28 @@ Status Col2Im::Compute(OpKernelContext* context) const { col_input_data + image_id * col_output_stride, // const T* data_col, image_shape->Data(), // const int64_t* img_shape, Yshape.Slice(2).GetDims().data(), // const int64_t* output_shape, - // col_input_shape[1], // int64_t channels_col, + // col_input_shape[1], // int64_t channels_col, // leads to output - // {1, -nan, 11, 16, 2.58141e+34, 2, 8.80295e+34, 12, 17, 22, 3, 4.59718e+24, 13, 18, 2.85144e+34, 4, - // -443.863, 14, -nan, 24, 5, 10, 15, 20, 25} + // {1, 6, 11, 16, 21, 2, 7, 12, 17, 22, 3, 8, 13, 18, 23, + // 4, 9, 14, 3.13005e+12, 1.88865e+31, 5, 10, 15, 20, 25,} // that is similar to input with some spots with random values - C, // int64_t channels_col, + C, // int64_t channels_col, // leads to output {1, 6, 11, 16, 21, 2, 7, 12, 17, 22, 3, 8, 13, 18, 23, 4, 9, 14, 19, 24, 5, 10, 15, 20, 25, } // that is identical to input - // col_input_shape[2], // int64_t channels_col, + // col_input_shape[2], // int64_t channels_col, // leads to output - // {1, 6, 1.92869e+31, 4.84145e+30, 1.88774e+31, 2, 7, 12, 17, 22, 3, 8, 1.86549e+31, 3.40686e+25, - // 2.20182e+24, 4, -2.56655e+29, 5.08551e+31, -1.05888e+29, 1.51107e+29, 5, 10, 15, 20, 7.2793e+31} + // {1, 6, 1.89906e+28, 7.00716e+22, 8.96572e+22, 2, 7, 6.09175e+22, 1.81786e+31, 3.50226e+29, 3, 8, + // 1.8001e+14, 2.67907e+20, 2.79522e+20, 4, 1.79858e+14, 4.74181e+30, 7.40484e+28, 1.80733e+28, 5, + // 10, 1.42889e+19, 6635.59, 2.46452e+11} // that is very similar to input, but with some rounded numbers and corrupted "25" value image_shape_size, // int64_t img_size, - dilated_kernel_shape_dims.data(), // const int64_t* kernel_shape, + dilated_kernel_shape.GetDims().data(), // const int64_t* kernel_shape, col2im_attrs_.strides.data(), // const int64_t* stride, col2im_attrs_.dilations.data(), // const int64_t* dilation, col2im_attrs_.pads.data(), // const int64_t* pad, - dilated_kernel_shape.Size(), // ptrdiff_t N, --> #spatial_dims? + image_shape->Shape().Size(), // ptrdiff_t N, --> #spatial_dims? Ydata + image_id * col_input_stride, // T* data_img, &CPUMathUtil::Instance()); // Provider* provider } diff --git a/onnxruntime/test/contrib_ops/col2im_test.cc b/onnxruntime/test/contrib_ops/col2im_test.cc index 57638b08bcb89..0e542aff9ac68 100644 --- a/onnxruntime/test/contrib_ops/col2im_test.cc +++ b/onnxruntime/test/contrib_ops/col2im_test.cc @@ -74,6 +74,45 @@ TEST(Col2ImContribOpTest, with2Images3channelsNonSquare4dNCHW) { test.Run(); } +TEST(Col2ImContribOpTest, with2Images2channelsNonSquareDilationPadStride4dNCHW) { + OpTester test("Col2Im", 1, kMSDomain); + + test.AddAttribute("strides", std::vector{2, 2}); + test.AddAttribute("dilations", std::vector{2, 2}); + test.AddAttribute("pads", std::vector{2, 2, 2, 2}); + + std::vector input{ 0., 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., + 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., + 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., + 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., + 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., 0.}; + std::vector output { 2., 0., 6., 0., 10., + 0., 0., 0., 0., 0., + 22., 0., 26., 0., 30., + 0., 0., 0., 0., 0., + 42., 0., 46., 0., 50., + 0., 0., 0., 0., 0., + 62., 0., 66., 0., 70., + 0., 0., 0., 0., 0., + 82., 0., 86., 0., 90., + 0., 0., 0., 0., 0., + 102., 0., 106., 0., 110., + 0., 0., 0., 0., 0., + 122., 0., 126., 0., 130., + 0., 0., 0., 0., 0., + 142., 0., 146., 0., 150., + 0., 0., 0., 0., 0.}; + test.AddInput("input", {2, 4, 16}, input); + test.AddInput("image_shape", {2}, std::vector{4, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 2}); + + test.AddOutput("output", {2, 2, 4, 5}, output); + test.Run(); +} + TEST(Col2ImContribOpTest, with3channels4dNCHW) { OpTester test("Col2Im", 1, kMSDomain); From f65b85f66299c777710d17176804ac01664cd4bb Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Thu, 11 Aug 2022 17:41:32 -0400 Subject: [PATCH 12/30] Code cleanup --- .../core/graph/contrib_ops/contrib_defs.cc | 6 +- .../core/providers/cpu/tensor/col2im.cc | 113 ++++---------- .../providers/cpu/tensor/col2im_attributes.h | 5 - onnxruntime/core/util/math_cpu.cc | 146 +++--------------- .../tools/pytorch_export_contrib_ops.py | 2 + onnxruntime/test/contrib_ops/col2im_test.cc | 43 +++--- 6 files changed, 77 insertions(+), 238 deletions(-) diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index cc992a7dfbdbe..715eb9cda97b5 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -917,7 +917,7 @@ void col2imShapeInference(InferenceContext& ctx) { if (ctx.getInputType(1)->tensor_type().shape().dim_size() != 1) { fail_shape_inference("image_shape tensor must have rank 1."); } - size_t n_input_dims = ctx.getInputType(1)->tensor_type().shape().dim(0).dim_value(); + size_t n_input_dims = static_cast(ctx.getInputType(1)->tensor_type().shape().dim(0).dim_value()); std::vector image_shape = {}; const TensorProto* image_shape_data = ctx.getInputData(1); if (image_shape_data) { @@ -1069,7 +1069,9 @@ ONNX_MS_OPERATOR_SET_SCHEMA(Col2Im, 1, "The shape of the block to apply on the input." "This is a 1-dim tensor of size of at least 2, containing the value [H_block, W_block] " " for a 2-D image or [dim_b1, dim_b2, ..., dim_bN] for a N-D block." - "Dilations, pads and strides are applied to block_shape under the hood.", + "Dilations, pads and strides are applied to block_shape under the hood." + "The kernel window start at the top-left of the block and slides to the right and down," + "similarly to how Convolution kernels do.", "tensor(int64)", OpSchema::Single, true, diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index 08c315193479d..3a75910f9ff6c 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include #include "core/providers/cpu/tensor/col2im.h" #include "core/framework/element_type_lists.h" @@ -25,68 +24,39 @@ REGISTER_KERNEL_TYPED(float) template Status Col2Im::Compute(OpKernelContext* context) const { - const auto* col_input = context->Input(0); + const auto* col_tensor = context->Input(0); const auto* image_shape = context->Input(1); const auto* kernel_shape = context->Input(2); - // TODO(rama): Kernel with dilation - TensorShapeVector dilated_kernel_shape_dims; - std::cout << "Status Col2Im::Compute(OpKernelContext* context)" << std::endl; - - const T* col_input_data = col_input->template Data(); - TensorShape col_input_shape = col_input->Shape(); - const auto col_input_N = col_input_shape[0]; - int64_t image_shape_size = 1; int64_t kernel_shape_size = 1; - int64_t kernel_shape_rank = 0; + TensorShapeVector adjusted_kernel_shape_dims; for (auto i=0; i < image_shape->Shape().Size(); ++i) { - ++kernel_shape_rank; image_shape_size *= image_shape->Data()[i]; kernel_shape_size *= kernel_shape->Data()[i]; - dilated_kernel_shape_dims.push_back(col2im_attrs_.dilations[i] * (kernel_shape->Data()[i] - 1) + 1); + adjusted_kernel_shape_dims.push_back(col2im_attrs_.dilations[i] * (kernel_shape->Data()[i] - 1) + 1); } - TensorShape dilated_kernel_shape(dilated_kernel_shape_dims); - const int64_t C = col_input_shape[1] / kernel_shape_size; - const int64_t col_output_stride = col_input_shape.SizeFromDimension(1); - const int64_t col_input_stride = C * image_shape_size; - - TensorShapeVector Y_dims; - Y_dims.insert(Y_dims.begin(), {col_input_N, C}); + TensorShape col_shape = col_tensor->Shape(); + const auto N = col_shape[0]; + const int64_t C = col_shape[1] / kernel_shape_size; + const int64_t col_stride = C * image_shape_size; + TensorShape adjusted_kernel_shape(adjusted_kernel_shape_dims); + const int64_t col_data_stride = col_shape.SizeFromDimension(1); + + TensorShapeVector batched_image_shape_dims, adjusted_image_shape_dims; + batched_image_shape_dims.insert(batched_image_shape_dims.begin(), {N, C}); for (auto i=0; i < image_shape->Shape()[0]; ++i) { - Y_dims.push_back(image_shape->Data()[i]); + batched_image_shape_dims.push_back(image_shape->Data()[i]); + adjusted_image_shape_dims.push_back(image_shape->Data()[i]-adjusted_kernel_shape[i]+1); } - TensorShape Yshape(Y_dims); - Tensor* Y = context->Output(0, Yshape); - T* Ydata = Y->template MutableData(); - - std::cout << "\n\tInput 0: col_input = ("; for (auto i=0; i < Yshape.Size(); ++i) std::cout << - col_input_data[i] << ", "; std::cout << ") with shape "<< col_input_shape << std::endl; - std::cout << "\tInput 1: image_shape = ("; for (auto i=0; i < image_shape->Shape().Size(); ++i) std::cout << - image_shape->Data()[i] << ", "; std::cout << ")" << std::endl; - std::cout << "\tInput 2: kernel_shape = ("; for (auto i=0; i < kernel_shape->Shape().Size(); ++i) std::cout << - kernel_shape->Data()[i] << ", "; std::cout << ")" << std::endl; - std::cout << "\tAttribute strides = ("; for (size_t i=0; i < col2im_attrs_.strides.size(); ++i) std::cout << - col2im_attrs_.strides[i] << ", "; std::cout << ")"<< std::endl; - std::cout << "\tAttribute dilations = ("; for (size_t i=0; i < col2im_attrs_.dilations.size(); ++i) std::cout << - col2im_attrs_.dilations[i] << ", "; std::cout << ")"<< std::endl; - std::cout << "\tAttribute pads = ("; for (size_t i=0; i < col2im_attrs_.pads.size(); ++i) std::cout << - col2im_attrs_.pads[i] << ", "; std::cout << ")"<< std::endl; - - std::cout << "\tVariable C: " << C << std::endl; - std::cout << "\tVariable col_input_N = " << col_input_N << std::endl; - std::cout << "\tVariable image_shape_size: " << image_shape_size << std::endl; - std::cout << "\tVariable kernel_shape_size: " << kernel_shape_size << std::endl; - std::cout << "\tVariable: dilated_kernel_shape = ("; for (size_t i=0; i < dilated_kernel_shape.NumDimensions(); ++i) std::cout << - dilated_kernel_shape[i] << ", "; std::cout << ")" << std::endl; - std::cout << "\n\tStatus Col2Im::Compute() --> math::Col2imNd<>()" << std::endl; + TensorShape batched_image_shape(batched_image_shape_dims), adjusted_image_shape(adjusted_image_shape_dims); + T* image_data = context->Output(0, batched_image_shape)->template MutableData(); - for (auto image_id = 0; image_id < col_input_N; ++image_id) { - std::cout << "Image " << image_id+1 << " out of "<< col_input_N << std::endl; + const T* col_data = col_tensor->template Data(); + for (auto image_id = 0; image_id < N; ++image_id) { if (image_shape->Shape()[0] == 2) { - std::cout << "image_shape->Shape()[0] == 2 --> Col2Im" << std::endl; math::Col2im( - col_input_data + image_id * col_output_stride, + col_data + image_id * col_data_stride, C, image_shape->Data()[0], image_shape->Data()[1], @@ -100,43 +70,24 @@ Status Col2Im::Compute(OpKernelContext* context) const { col2im_attrs_.pads[3], col2im_attrs_.strides[0], col2im_attrs_.strides[1], - Ydata + image_id * col_input_stride, + image_data + image_id * col_stride, &CPUMathUtil::Instance()); } else { - std::cout << "image_shape->Shape()[0] != 2 --> Col2ImNd (nd=" << image_shape->Shape()[0] << ") " << std::endl; math::Col2imNd( - col_input_data + image_id * col_output_stride, // const T* data_col, - image_shape->Data(), // const int64_t* img_shape, - Yshape.Slice(2).GetDims().data(), // const int64_t* output_shape, - // col_input_shape[1], // int64_t channels_col, - // leads to output - // {1, 6, 11, 16, 21, 2, 7, 12, 17, 22, 3, 8, 13, 18, 23, - // 4, 9, 14, 3.13005e+12, 1.88865e+31, 5, 10, 15, 20, 25,} - // that is similar to input with some spots with random values - - C, // int64_t channels_col, - // leads to output {1, 6, 11, 16, 21, 2, 7, 12, 17, 22, 3, 8, 13, 18, 23, 4, 9, 14, 19, 24, 5, 10, 15, 20, 25, } - // that is identical to input - - // col_input_shape[2], // int64_t channels_col, - // leads to output - // {1, 6, 1.89906e+28, 7.00716e+22, 8.96572e+22, 2, 7, 6.09175e+22, 1.81786e+31, 3.50226e+29, 3, 8, - // 1.8001e+14, 2.67907e+20, 2.79522e+20, 4, 1.79858e+14, 4.74181e+30, 7.40484e+28, 1.80733e+28, 5, - // 10, 1.42889e+19, 6635.59, 2.46452e+11} - // that is very similar to input, but with some rounded numbers and corrupted "25" value - image_shape_size, // int64_t img_size, - dilated_kernel_shape.GetDims().data(), // const int64_t* kernel_shape, - col2im_attrs_.strides.data(), // const int64_t* stride, - col2im_attrs_.dilations.data(), // const int64_t* dilation, - col2im_attrs_.pads.data(), // const int64_t* pad, - image_shape->Shape().Size(), // ptrdiff_t N, --> #spatial_dims? - Ydata + image_id * col_input_stride, // T* data_img, - &CPUMathUtil::Instance()); // Provider* provider + col_data + image_id * col_data_stride, + image_shape->Data(), + adjusted_image_shape.GetDims().data(), + kernel_shape_size * C, + image_shape_size, + adjusted_kernel_shape.GetDims().data(), + col2im_attrs_.strides.data(), + col2im_attrs_.dilations.data(), + col2im_attrs_.pads.data(), + image_shape->Shape().Size(), + image_data + image_id * col_stride, + &CPUMathUtil::Instance()); } } - std::cout << "\n\n Return Col2Im::Compute() --> "; for (auto i=0; i < Yshape.Size(); ++i) std::cout << - Ydata[i] << ", "; std::cout << ") with shape " << Yshape << std::endl << std::endl; - return Status::OK(); } diff --git a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h index 9639718db5ecf..eaef183334ba6 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h +++ b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h @@ -34,15 +34,10 @@ struct Col2ImAttributes { explicit Col2ImAttributes(const OpKernelInfo& info) { auto status = info.GetAttrs("strides", strides); - // ORT_ENFORCE(status.IsOK()); - gsl::span pads_span; status = info.GetAttrsAsSpan("pads", pads_span); - // ORT_ENFORCE(status.IsOK()); pads.assign(pads_span.cbegin(), pads_span.cend()); - status = info.GetAttrs("dilations", dilations); - // ORT_ENFORCE(status.IsOK()); } ~Col2ImAttributes() = default; diff --git a/onnxruntime/core/util/math_cpu.cc b/onnxruntime/core/util/math_cpu.cc index 3b5fa0a549af9..164e88573c4cb 100644 --- a/onnxruntime/core/util/math_cpu.cc +++ b/onnxruntime/core/util/math_cpu.cc @@ -31,13 +31,10 @@ #pragma GCC diagnostic pop #endif using onnxruntime::concurrency::ThreadPool; -#include namespace onnxruntime { namespace math { -using std::cout; - // MatMul implementation purely based on Eigen. #define EIGEN_MATMUL_FUNCTION(T) \ template <> \ @@ -310,56 +307,23 @@ void Im2col::operator()( int64_t stride_w, T* data_col, T padding_value) { - - cout << "void Im2col::operator()("; - cout << "\n\tconst T* data_im=" << data_im; - cout << "\n\tint64_t channels=" << channels; - cout << "\n\tint64_t heigh=" << height; - cout << "\n\tint64_t width=" << width; - cout << "\n\tint64_t kernel_h=" << kernel_h; - cout << "\n\tint64_t kernel_w=" << kernel_w; - cout << "\n\tint64_t dilation_h=" << dilation_h; - cout << "\n\tint64_t dilation_w=" << dilation_w; - cout << "\n\tint64_t pad_t=" << pad_t; - cout << "\n\tint64_t pad_l=" << pad_l; - cout << "\n\tint64_t pad_b=" << pad_b; - cout << "\n\tint64_t pad_r=" << pad_r; - cout << "\n\tint64_t stride_h=" << stride_h; - cout << "\n\tint64_t stride_w=" << stride_w; - cout << "\n\tT* data_col=" << data_col; - cout << "\n\tT padding_value=" << padding_value << ")" << std::endl; - - int data_col_offset = 0; const int64_t output_h = (height + pad_b + pad_t - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int64_t output_w = (width + pad_l + pad_r - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; - cout << "output_h: " << output_h << std::endl; - cout << "output_w: " << output_w << std::endl; + // From Intel, https://github.com/BVLC/caffe/pull/3536 int64_t channel_size = height * width; - cout << "channel_size (height * width): " << channel_size << std::endl; for (int64_t channel = channels; channel--; data_im += channel_size) { - cout << "for channel= " << channel << "/channel_size=" << channel_size << std::endl; for (int64_t kernel_row = 0; kernel_row < kernel_h; kernel_row++) { - cout << "\tfor kernel_row= " << kernel_row << "/kernel_h=" << kernel_h << std::endl; for (int64_t kernel_col = 0; kernel_col < kernel_w; kernel_col++) { - cout << "\t\tfor kernel_col= " << kernel_col << "/kernel_w=" << kernel_w << std::endl; int64_t input_row = -pad_t + kernel_row * dilation_h; - cout << "\t\t\tinput_row= " << input_row << std::endl; for (int64_t output_rows = output_h; output_rows; output_rows--) { - cout << "\t\t\tfor output_rows= " << output_rows << "/output_h=" << output_h << std::endl; if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { std::fill_n(data_col, output_w, padding_value); - cout << "\t\t\t\t(input_row out of bounds) data_col["<< data_col_offset << "] = " << padding_value << - " * " << output_w << " times"<< std::endl; - data_col_offset += output_w; data_col += output_w; } else { int64_t input_col = -pad_l + kernel_col * dilation_w; - cout << "\t\t\t\tinput_col= " << input_col << std::endl; - cout << "\t\t\t\tinput_pos= " << input_row * width + input_col << std::endl; const T* rdptr = data_im + input_row * width + input_col; for (int64_t i = 0; i < output_w;) { - cout << "\t\t\t\tfor i= " << i << "/output_w=" << output_w << std::endl; int64_t output_handled = 1; if (is_a_ge_zero_and_a_lt_b(input_col, width)) { if (stride_w == 1) { @@ -367,10 +331,6 @@ void Im2col::operator()( // and the number of output elements to produce. output_handled = std::min(width - input_col, output_w - i); data_col = std::copy_n(&rdptr[i], static_cast(output_handled), data_col); - cout << "\t\t\t\t\tdata_col["<< data_col_offset << "] = " << rdptr[i] << std::endl; - data_col_offset += output_handled; - // cout << "\t\t\t\t\t(stride 1) Copied " << output_handled << - // " element(s) from data_im to data_col: " << rdptr[i] << std::endl; } else if (stride_w == 2) { // Same as above except using the number of strided input elements. output_handled = std::min((width - input_col + 1) / 2, output_w - i); @@ -379,16 +339,11 @@ void Im2col::operator()( *(data_col++) = *local_rdptr; local_rdptr += 2; } - cout << "\t\t\t\t\t(stride 2) Copy " << output_handled - << " elements from data_im to data_col " << std::endl; } else { *(data_col++) = rdptr[i * stride_w]; - cout << "\t\t\t\t\t(stride >2) Copy 1 element from data_im to data_col " << std::endl; } } else { *(data_col++) = padding_value; - cout << "\t\t\t\t\t(input_col out of bounds) fill data_col with 1 padding_value= " << - padding_value << std::endl; } input_col += output_handled * stride_w; i += output_handled; @@ -415,36 +370,7 @@ void Im2col::operator()( T* data_col, bool accumulate_output, T padding_value) { - - int64_t im_shape_size = std::accumulate(im_shape, im_shape + rank, 1LL, std::multiplies()); - int64_t output_shape_size = std::accumulate(output_shape, output_shape + rank, 1LL, std::multiplies()); - int64_t kernel_shape_size = std::accumulate(kernel_shape, kernel_shape + rank, 1LL, std::multiplies()); - - cout << "\n\nCalled void Im2col::operator()("; - cout << ",\n\tconst T* data_im={"; for (auto i=0; i < im_shape_size; ++i) cout << data_im[i] << - ", "; cout << "}"; - cout << ",\n\tconst int64_t* im_shape={"; for (auto i=0; i < rank; ++i) cout << im_shape[i] << - ", "; cout << "}"; - cout << ",\n\tconst int64_t* output_shape={"; for (auto i=0; i < rank; ++i) cout << output_shape[i] << - ", "; cout << "}"; - cout << ",\n\tint64_t channels_col=" << channels_col; - cout << ",\n\tconst int64_t* kernel_shape={"; for (auto i=0; i < rank; ++i) cout << kernel_shape[i] << - ", "; cout << "}"; - cout << ",\n\tconst int64_t* stride={"; for (auto i=0; i < rank; ++i) cout << stride[i] << - ", "; cout << "}"; - cout << ",\n\tconst int64_t* dilation={"; for (auto i=0; i < rank; ++i) cout << dilation[i] << - ", "; cout << "}"; - cout << ",\n\tconst int64_t* pad={"; for (auto i=0; i < 2*rank; ++i) cout << pad[i] << ", "; - cout << "}"; - cout << ",\n\tptrdiff_t rank=" << rank; - cout << ",\n\tT* data_col= preallocated pointer to write at {"; for (auto i=0; i < output_shape_size; ++i) cout << - data_col[i] << ", "; cout << "}"; - cout << ",\n\tbool accumulate_output=" << accumulate_output; - cout << ",\n\tT padding_value=" << padding_value << ")"; - - cout << "\n\n\tVariable im_shape_size: " << im_shape_size << "\n\tVariable output_shape_size: " << - output_shape_size << "\n\tVariable kernel_shape_size: " << kernel_shape_size << std::endl << std::endl; - + int64_t kernel_size = std::accumulate(kernel_shape, kernel_shape + rank, 1LL, std::multiplies()); std::vector d_offset(rank, 0); std::vector d_iter(rank, 0); for (int64_t c_col = 0; c_col < channels_col; ++c_col) { @@ -460,7 +386,7 @@ void Im2col::operator()( // Loop over spatial axes in forward order to compute the indices in the // image and column, and whether the index lies in the padding. int64_t index_col = c_col; - int64_t index_im = c_col / kernel_shape_size; + int64_t index_im = c_col / kernel_size; bool is_padding = false; for (ptrdiff_t d_i = 0; d_i < rank; ++d_i) { int64_t d = d_iter[d_i]; @@ -482,9 +408,6 @@ void Im2col::operator()( } } while (NextPosition(rank, output_shape, d_iter.data())); } // for (int c = 0; c < channels_col; ++c) { - - cout << "Return void Im2col -> T* data_col={"; for (auto i=0; i < output_shape_size; ++i) cout << - data_col[i] << ", "; cout << "}\n"; } template struct Im2col; @@ -857,55 +780,24 @@ void Col2im(const float* data_col, int64 } template <> -void Col2imNd(const float* data_col, - const int64_t* img_shape, - const int64_t* output_shape, - int64_t channels_col, - int64_t img_size, - const int64_t* kernel_shape, - const int64_t* stride, - const int64_t* dilation, - const int64_t* pad, - ptrdiff_t N, - float* data_img, - CPUMathUtil* context) { - cout << "\n\nCalled void Col2imNd("; - cout << ",\n\tconst float* data_col={"; for (auto i=0; i < img_size; ++i) cout << - data_col[i] << ", "; cout << "}"; - cout << ",\n\tconst int64_t* img_shape={"; for (auto i=0; i < N; ++i) cout << img_shape[i] << - ", "; cout << "}"; - cout << ",\n\tconst int64_t* output_shape={"; for (auto i=0; i < N; ++i) cout << output_shape[i] << - ", "; cout << "}"; - cout << ",\n\tint64_t channels_col=" << channels_col; - cout << ",\n\tint64_t img_size=" << img_size; - cout << ",\n\tconst int64_t* kernel_shape={"; for (auto i=0; i < N; ++i) cout << kernel_shape[i] << - ", "; cout << "}"; - cout << ",\n\tconst int64_t* stride={"; for (auto i=0; i < N; ++i) cout << stride[i] << ", "; - cout << "}"; - cout << ",\n\tconst int64_t* dilation={"; for (auto i=0; i < N; ++i) cout << dilation[i] << ", "; - cout << "}"; - cout << ",\n\tconst int64_t* pad={"; for (auto i=0; i < 2*N; ++i) cout << pad[i] << ", "; cout << "}"; - cout << ",\n\tptrdiff_t N=" << N; - cout << ",\n\tfloat* data_img= preallocated pointer to save at {"; for (auto i=0; i < img_size; ++i) cout << - data_img[i] << ", "; cout << "}"; - cout << ",\n\tCPUMathUtil* context=...)" << std::endl; - +void Col2imNd(const float* data_col, const int64_t* img_shape, + const int64_t* output_shape, int64_t channels_col, int64_t img_size, + const int64_t* kernel_shape, const int64_t* stride, + const int64_t* dilation, const int64_t* pad, ptrdiff_t N, + float* data_img, CPUMathUtil* context) { Set(gsl::narrow(img_size), 0, data_img, context); Im2col()( - data_col, // const T* data_im, - img_shape, // const int64_t* im_shape, - output_shape, // const int64_t* output_shape, - channels_col, // int64_t channels_col, - kernel_shape, // const int64_t* kernel_shape, - stride, // const int64_t* stride, - dilation, // const int64_t* dilation, - pad, // const int64_t* pad, - N, // ptrdiff_t rank, - data_img, // T* data_col, - true); // bool accumulate_output, - - cout << "Return void Col2imNd --> float* data_img= {"; for (auto i=0; i < img_size; ++i) cout << - data_img[i] << ", "; cout << "}"; + data_col, + img_shape, + output_shape, + channels_col, + kernel_shape, + stride, + dilation, + pad, + N, + data_img, + true); } #define SPECIALIZED_COPYVECTOR(T) \ diff --git a/onnxruntime/python/tools/pytorch_export_contrib_ops.py b/onnxruntime/python/tools/pytorch_export_contrib_ops.py index 6d11f6ebeb6ae..82a5c558a2f59 100644 --- a/onnxruntime/python/tools/pytorch_export_contrib_ops.py +++ b/onnxruntime/python/tools/pytorch_export_contrib_ops.py @@ -94,6 +94,8 @@ def tril(g, self, diagonal): def col2im(g, self: torch._C.Value, image_shape, block_shape): return g.op("com.microsoft::Col2Im", self, image_shape, block_shape) + _reg(col2im) + def unregister(): """Unregister ONNX Runtime's built-in contrib ops.""" diff --git a/onnxruntime/test/contrib_ops/col2im_test.cc b/onnxruntime/test/contrib_ops/col2im_test.cc index 0e542aff9ac68..e4ed7908418a4 100644 --- a/onnxruntime/test/contrib_ops/col2im_test.cc +++ b/onnxruntime/test/contrib_ops/col2im_test.cc @@ -17,25 +17,25 @@ std::vector _transpose_serialized_vector(std::vector &input, size_t N, siz } std::vector trans_vec(input); - std::cout << "input: ("; - for (size_t i = 0; i < input_size; ++i) - std::cout << trans_vec[i] << ", "; - std::cout << ")" << std::endl; - for (size_t n = 0; n < N; ++n) for (size_t c = 0; c < C; ++c) for (size_t h = 0; h < H; ++h) for (size_t w = 0; w < W; ++w) - trans_vec[n * (C * H * W) + c * (H * W) + (h + H * w)] = input[n * (C * H * W) + c * (H * W) + (w + W * h)]; - - std::cout << "trans_vec: ("; - for (size_t i = 0; i < input_size; ++i) - std::cout << trans_vec[i] << ", "; - std::cout << ")" << std::endl; + trans_vec[n * (C * H * W) + c * (H * W) + (h + H * w)] = \ + input[n * (C * H * W) + c * (H * W) + (w + W * h)]; return trans_vec; } +struct float_iota { + explicit float_iota(float inc, float init_value = 0.0) : _value(init_value), _inc(inc) {} + + operator float() const { return _value; } + float_iota& operator++() { _value += _inc; return *this; } + float _value; + float _inc; +}; + TEST(Col2ImContribOpTest, simple4dNCHW) { OpTester test("Col2Im", 1, kMSDomain); @@ -45,7 +45,8 @@ TEST(Col2ImContribOpTest, simple4dNCHW) { std::vector input(25); std::vector output(25); - std::iota(output.begin(), output.end(), 1); + std::iota(output.begin(), output.end(), float_iota(1., 1.)); + input = _transpose_serialized_vector(output, 1, 1, 5, 5); test.AddInput("input", {1, 5, 5}, input); test.AddInput("image_shape", {2}, std::vector{5, 5}); @@ -64,7 +65,7 @@ TEST(Col2ImContribOpTest, with2Images3channelsNonSquare4dNCHW) { std::vector input(120); std::vector output(120); - std::iota(output.begin(), output.end(), 1); + std::iota(output.begin(), output.end(), float_iota(1., 1.)); input = _transpose_serialized_vector(output, 2, 3, 4, 5); test.AddInput("input", {2, 15, 4}, input); test.AddInput("image_shape", {2}, std::vector{4, 5}); @@ -122,7 +123,7 @@ TEST(Col2ImContribOpTest, with3channels4dNCHW) { std::vector input(75); std::vector output(75); - std::iota(output.begin(), output.end(), 1); + std::iota(output.begin(), output.end(), float_iota(1., 1.)); input = _transpose_serialized_vector(output, 1, 3, 5, 5); test.AddInput("input", {1, 15, 5}, input); test.AddInput("image_shape", {2}, std::vector{5, 5}); @@ -141,7 +142,7 @@ TEST(Col2ImContribOpTest, with2Images3channels4dNCHW) { std::vector input(150); std::vector output(150); - std::iota(output.begin(), output.end(), 1); + std::iota(output.begin(), output.end(), float_iota(1., 1.)); input = _transpose_serialized_vector(output, 2, 3, 5, 5); test.AddInput("input", {2, 15, 5}, input); test.AddInput("image_shape", {2}, std::vector{5, 5}); @@ -160,7 +161,7 @@ TEST(Col2ImContribOpTest, simple5dNCHWD) { std::vector input(25); std::vector output(25); - std::iota(output.begin(), output.end(), 1); + std::iota(output.begin(), output.end(), float_iota(1., 1.)); input = _transpose_serialized_vector(output, 1, 1, 5, 5); test.AddInput("input", {1, 5, 5}, input); test.AddInput("image_shape", {3}, std::vector{1, 5, 5}); @@ -172,11 +173,9 @@ TEST(Col2ImContribOpTest, simple5dNCHWD) { TEST(Im2ColContribOpTest, simple) { std::vector input(24); std::vector expected_output(24); - std::iota(input.begin(), input.end(), 1); + std::iota(input.begin(), input.end(), float_iota(1., 1.)); expected_output = {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12, 13, 17, 21, 14, 18, 22, 15, 19, 23, 16, 20, 24}; - float* actual_output = new float(24); - std::cout << "\nExpected output --> "; for (auto i=0; i < 24; ++i) std::cout << expected_output[i] << ", "; - std::cout << ")" << std::endl; + float* actual_output = new float[24]; math::Im2col()( input.data(), int64_t(2), @@ -195,9 +194,7 @@ TEST(Im2ColContribOpTest, simple) { actual_output, 0.); - std::cout << "\nActual output --> "; for (auto i=0; i < 24; ++i) std::cout << actual_output[i] << - ", "; std::cout << ")" << std::endl; - delete[] actual_output; + delete [] actual_output; } } // namespace test From 8b033a744fb2c25936f26507daa08d08b9c692a9 Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Tue, 16 Aug 2022 13:13:37 -0400 Subject: [PATCH 13/30] Update documentation --- docs/ContribOperators.md | 54 ++++++++++++++++++++++++++++++++++++++++ docs/OperatorKernels.md | 1 + 2 files changed, 55 insertions(+) diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md index e35bc530338d6..6f49c85fcf24e 100644 --- a/docs/ContribOperators.md +++ b/docs/ContribOperators.md @@ -13,6 +13,7 @@ Do not modify directly.* * com.microsoft.BitmaskBiasDropout * com.microsoft.BitmaskDropout * com.microsoft.CDist + * com.microsoft.Col2Im * com.microsoft.ComplexMul * com.microsoft.ComplexMulConj * com.microsoft.ConvTransposeWithDynamicPads @@ -753,6 +754,59 @@ This version of the operator has been available since version 1 of the 'com.micr +### **com.microsoft.Col2Im** + + The operator rearranges column blocks back into a multidimensional image + + Col2Im behaves similarly to PyTorch's fold https://pytorch.org/docs/stable/generated/torch.nn.Fold.html, + but it only supports *batched* multi-dimensional image tensors. + + NOTE: Although specifying image_shape looks redundant because it could be calculated from + convolution formulas, it is required as input for more advanced scenarios as explained + at PyTorch's implementation (https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/Col2Im.cpp#L10) + + +#### Version + +This version of the operator has been available since version 1 of the 'com.microsoft' operator set. + +#### Attributes + +
+
dilations : list of ints
+
1-dimensional tensor with dilation value along each spatial axis of the image. If not present, the dilation defaults to 1 along each spatial axis of the image.
+
pads : list of ints
+
1-dimensional tensor with padding value for the beginning and ending along each spatial axis, it can take any value greater than or equal to 0. The value represent the number of pixels added to the beginning and end part of the corresponding axis. `pads` format should be as follow [x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin is the number of pixels added at the beginning of axis `i` and xi_end the same for the end of axis `i`. If not present, the padding defaults to 0 along start and end of each spatial axis.
+
strides : list of ints
+
1-dimensional tensor with stride value along each spatial axis. If not present, the stride defaults to 1 along each spatial axis.
+
+ +#### Inputs + +
+
input : T
+
Input data tensor to be rearranged from column blocks back into an image. This is a 3-dimensional tensor containing [N, C * n-ary-product(block_shape), L], where N is batch dimension, C is image channel dimension and L is number of blocks.
+
image_shape : tensor(int64)
+
The shape of the spatial dimensions of the image after rearranging the column blocks.This is a 1-dim tensor with size of at least 2, containing the value [H_img, W_img] for a 2-D image or [dim_i1, dim_i2, ..., dim_iN] for a N-D image.
+
block_shape : tensor(int64)
+
The shape of the block to apply on the input.This is a 1-dim tensor of size of at least 2, containing the value [H_block, W_block] for a 2-D image or [dim_b1, dim_b2, ..., dim_bN] for a N-D block.Dilations, pads and strides are applied to block_shape under the hood.The kernel window start at the top-left of the block and slides to the right and down,similarly to how Convolution kernels do.
+
+ +#### Outputs + +
+
output : T
+
Output tensor produced by rearranging blocks into an image.
+
+ +#### Type Constraints + +
+
T : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16), tensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64), tensor(complex128)
+
Constrain input and output types to all numeric tensor types.
+
+ + ### **com.microsoft.ComplexMul** #### Version diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index 0b5ae058a3474..2f84ec528646a 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -399,6 +399,7 @@ Do not modify directly.* |BiasGelu|*in* A:**T**
*in* B:**T**
*out* C:**T**|1+|**T** = tensor(float)| |BifurcationDetector|*in* src_tokens:**T**
*in* cur_tokens:**T**
*in* prev_suffix_match_idx:**T**
*in* pred_tokens:**T**
*out* tokens:**T**
*out* suffix_match_idx:**T**|1+|**T** = tensor(int64)| |CDist|*in* A:**T**
*in* B:**T**
*out* C:**T**|1+|**T** = tensor(double), tensor(float)| +|Col2Im|*in* input:**T**
*in* image_shape:**tensor(int64)**
*in* block_shape:**tensor(int64)**
*out* output:**T**|1+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |ConvTransposeWithDynamicPads|*in* X:**T**
*in* W:**T**
*in* Pads:**tensor(int64)**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float)| |CropAndResize|*in* X:**T1**
*in* rois:**T1**
*in* batch_indices:**T2**
*in* crop_size:**T2**
*out* Y:**T1**|1+|**T1** = tensor(float)
**T2** = tensor(int32)| |DequantizeLinear|*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T2**|1+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(float)| From 5c2d137968dade45180d5d16d382537b584d74c4 Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Mon, 22 Aug 2022 10:42:50 -0400 Subject: [PATCH 14/30] Address comments --- onnxruntime/test/contrib_ops/col2im_test.cc | 33 ++++----------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/onnxruntime/test/contrib_ops/col2im_test.cc b/onnxruntime/test/contrib_ops/col2im_test.cc index e4ed7908418a4..2e750f2e9f276 100644 --- a/onnxruntime/test/contrib_ops/col2im_test.cc +++ b/onnxruntime/test/contrib_ops/col2im_test.cc @@ -7,8 +7,11 @@ #include "core/util/math.h" namespace onnxruntime { +namespace contrib { namespace test { +using namespace onnxruntime::test; +namespace { template std::vector _transpose_serialized_vector(std::vector &input, size_t N, size_t C, size_t H, size_t W) { size_t input_size = input.size(); @@ -36,6 +39,8 @@ struct float_iota { float _inc; }; +} // namespace + TEST(Col2ImContribOpTest, simple4dNCHW) { OpTester test("Col2Im", 1, kMSDomain); @@ -170,32 +175,6 @@ TEST(Col2ImContribOpTest, simple5dNCHWD) { test.Run(); } -TEST(Im2ColContribOpTest, simple) { - std::vector input(24); - std::vector expected_output(24); - std::iota(input.begin(), input.end(), float_iota(1., 1.)); - expected_output = {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12, 13, 17, 21, 14, 18, 22, 15, 19, 23, 16, 20, 24}; - float* actual_output = new float[24]; - math::Im2col()( - input.data(), - int64_t(2), - int64_t(3), - int64_t(4), - int64_t(1), - int64_t(4), - int64_t(1), - int64_t(1), - int64_t(0), - int64_t(0), - int64_t(0), - int64_t(0), - int64_t(1), - int64_t(1), - actual_output, - 0.); - - delete [] actual_output; -} - } // namespace test +} // namespace contrib } // namespace onnxruntime From bda84f2c3d349c6571a080bc165c8f93448e875f Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Mon, 29 Aug 2022 17:55:17 -0400 Subject: [PATCH 15/30] Address comments --- .../core/providers/cpu/tensor/col2im.cc | 27 ++++++-------- .../core/providers/cpu/tensor/col2im.h | 6 ---- .../providers/cpu/tensor/col2im_attributes.h | 9 +++-- onnxruntime/test/contrib_ops/col2im_test.cc | 35 ++++++++----------- 4 files changed, 29 insertions(+), 48 deletions(-) diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index 3a75910f9ff6c..0625999303e6d 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -2,25 +2,20 @@ // Licensed under the MIT License. #include "core/providers/cpu/tensor/col2im.h" +#include "core/util/math_cpuonly.h" -#include "core/framework/element_type_lists.h" -#include "core/framework/TensorSeq.h" -#include "core/providers/common.h" -#include "core/framework/copy.h" -#include "core/common/safeint.h" -#include "core/providers/op_kernel_type_control.h" namespace onnxruntime { -#define REGISTER_KERNEL_TYPED(T) \ +#define REGISTER_COL2IM_TYPED_KERNEL(OP_TYPE, VERSION, TYPE, KERNEL_CLASS) \ ONNX_CPU_OPERATOR_TYPED_KERNEL( \ - Col2Im, \ - 1, \ - T, \ - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::AllTensorTypes()), \ - Col2Im); + OP_TYPE, \ + VERSION, \ + TYPE, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ + KERNEL_CLASS); -REGISTER_KERNEL_TYPED(float) +REGISTER_COL2IM_TYPED_KERNEL(Col2Im, 1, float, Col2Im); template Status Col2Im::Compute(OpKernelContext* context) const { @@ -49,13 +44,13 @@ Status Col2Im::Compute(OpKernelContext* context) const { batched_image_shape_dims.push_back(image_shape->Data()[i]); adjusted_image_shape_dims.push_back(image_shape->Data()[i]-adjusted_kernel_shape[i]+1); } - TensorShape batched_image_shape(batched_image_shape_dims), adjusted_image_shape(adjusted_image_shape_dims); + TensorShape batched_image_shape(batched_image_shape_dims); T* image_data = context->Output(0, batched_image_shape)->template MutableData(); const T* col_data = col_tensor->template Data(); for (auto image_id = 0; image_id < N; ++image_id) { if (image_shape->Shape()[0] == 2) { - math::Col2im( + math::Col2im( col_data + image_id * col_data_stride, C, image_shape->Data()[0], @@ -76,7 +71,7 @@ Status Col2Im::Compute(OpKernelContext* context) const { math::Col2imNd( col_data + image_id * col_data_stride, image_shape->Data(), - adjusted_image_shape.GetDims().data(), + adjusted_image_shape_dims.data(), kernel_shape_size * C, image_shape_size, adjusted_kernel_shape.GetDims().data(), diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.h b/onnxruntime/core/providers/cpu/tensor/col2im.h index 8cbefd2ec668b..b5849ecc9426b 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.h +++ b/onnxruntime/core/providers/cpu/tensor/col2im.h @@ -5,12 +5,6 @@ #include "core/providers/cpu/tensor/col2im_attributes.h" -#include "core/common/common.h" -#include "core/framework/op_kernel.h" -#include "core/util/math_cpuonly.h" -#include "core/framework/tensor.h" -#include "core/providers/cpu/tensor/concatbase.h" - namespace onnxruntime { template diff --git a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h index eaef183334ba6..3b94ed213e8bb 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h +++ b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h @@ -17,11 +17,9 @@ #pragma once -#ifndef SHARED_PROVIDER #include "core/common/common.h" #include "core/providers/common.h" #include "core/util/math.h" -#endif #include "core/common/inlined_containers.h" #include "core/framework/op_kernel.h" @@ -33,11 +31,12 @@ struct Col2ImAttributes { using Col2ImPadVector = InlinedVector; explicit Col2ImAttributes(const OpKernelInfo& info) { - auto status = info.GetAttrs("strides", strides); + // Make sure empty strides, pads or dilations are defaulted to 1 if necessary + ORT_ENFORCE(info.GetAttrs("strides", strides).IsOK()); gsl::span pads_span; - status = info.GetAttrsAsSpan("pads", pads_span); + ORT_ENFORCE(info.GetAttrsAsSpan("pads", pads_span).IsOK()); pads.assign(pads_span.cbegin(), pads_span.cend()); - status = info.GetAttrs("dilations", dilations); + ORT_ENFORCE(info.GetAttrs("dilations", dilations).IsOK()); } ~Col2ImAttributes() = default; diff --git a/onnxruntime/test/contrib_ops/col2im_test.cc b/onnxruntime/test/contrib_ops/col2im_test.cc index 2e750f2e9f276..0801f3792509e 100644 --- a/onnxruntime/test/contrib_ops/col2im_test.cc +++ b/onnxruntime/test/contrib_ops/col2im_test.cc @@ -4,16 +4,18 @@ #include #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" + +using namespace onnxruntime::test; #include "core/util/math.h" namespace onnxruntime { namespace contrib { namespace test { -using namespace onnxruntime::test; + namespace { template -std::vector _transpose_serialized_vector(std::vector &input, size_t N, size_t C, size_t H, size_t W) { +std::vector TransposeSerializedVector(std::vector &input, size_t N, size_t C, size_t H, size_t W) { size_t input_size = input.size(); if (input_size == 0) { throw std::runtime_error("Invalid input"); @@ -30,15 +32,6 @@ std::vector _transpose_serialized_vector(std::vector &input, size_t N, siz return trans_vec; } -struct float_iota { - explicit float_iota(float inc, float init_value = 0.0) : _value(init_value), _inc(inc) {} - - operator float() const { return _value; } - float_iota& operator++() { _value += _inc; return *this; } - float _value; - float _inc; -}; - } // namespace TEST(Col2ImContribOpTest, simple4dNCHW) { @@ -50,9 +43,9 @@ TEST(Col2ImContribOpTest, simple4dNCHW) { std::vector input(25); std::vector output(25); - std::iota(output.begin(), output.end(), float_iota(1., 1.)); + std::iota(output.begin(), output.end(), 1.0f); - input = _transpose_serialized_vector(output, 1, 1, 5, 5); + input = TransposeSerializedVector(output, 1, 1, 5, 5); test.AddInput("input", {1, 5, 5}, input); test.AddInput("image_shape", {2}, std::vector{5, 5}); test.AddInput("block_shape", {2}, std::vector{1, 5}); @@ -70,8 +63,8 @@ TEST(Col2ImContribOpTest, with2Images3channelsNonSquare4dNCHW) { std::vector input(120); std::vector output(120); - std::iota(output.begin(), output.end(), float_iota(1., 1.)); - input = _transpose_serialized_vector(output, 2, 3, 4, 5); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 2, 3, 4, 5); test.AddInput("input", {2, 15, 4}, input); test.AddInput("image_shape", {2}, std::vector{4, 5}); test.AddInput("block_shape", {2}, std::vector{1, 5}); @@ -128,8 +121,8 @@ TEST(Col2ImContribOpTest, with3channels4dNCHW) { std::vector input(75); std::vector output(75); - std::iota(output.begin(), output.end(), float_iota(1., 1.)); - input = _transpose_serialized_vector(output, 1, 3, 5, 5); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 1, 3, 5, 5); test.AddInput("input", {1, 15, 5}, input); test.AddInput("image_shape", {2}, std::vector{5, 5}); test.AddInput("block_shape", {2}, std::vector{1, 5}); @@ -147,8 +140,8 @@ TEST(Col2ImContribOpTest, with2Images3channels4dNCHW) { std::vector input(150); std::vector output(150); - std::iota(output.begin(), output.end(), float_iota(1., 1.)); - input = _transpose_serialized_vector(output, 2, 3, 5, 5); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 2, 3, 5, 5); test.AddInput("input", {2, 15, 5}, input); test.AddInput("image_shape", {2}, std::vector{5, 5}); test.AddInput("block_shape", {2}, std::vector{1, 5}); @@ -166,8 +159,8 @@ TEST(Col2ImContribOpTest, simple5dNCHWD) { std::vector input(25); std::vector output(25); - std::iota(output.begin(), output.end(), float_iota(1., 1.)); - input = _transpose_serialized_vector(output, 1, 1, 5, 5); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 1, 1, 5, 5); test.AddInput("input", {1, 5, 5}, input); test.AddInput("image_shape", {3}, std::vector{1, 5, 5}); test.AddInput("block_shape", {3}, std::vector{1, 1, 5}); From 1d36599e78fe063e284f6cbe3fb3739efe2c1baa Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Thu, 1 Sep 2022 12:46:46 -0400 Subject: [PATCH 16/30] Address comments --- onnxruntime/contrib_ops/cpu/col2im.cc | 2 +- onnxruntime/core/providers/cpu/tensor/col2im_attributes.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/onnxruntime/contrib_ops/cpu/col2im.cc b/onnxruntime/contrib_ops/cpu/col2im.cc index 50689ccb6b4ab..2f66b3a5ace4b 100644 --- a/onnxruntime/contrib_ops/cpu/col2im.cc +++ b/onnxruntime/contrib_ops/cpu/col2im.cc @@ -13,7 +13,7 @@ namespace contrib { 1, \ T, \ kCpuExecutionProvider, \ - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::AllTensorTypes()), \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ Col2Im); REGISTER_KERNEL_TYPED(float) diff --git a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h index 3b94ed213e8bb..2e9db6ca0bc15 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h +++ b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h @@ -32,11 +32,11 @@ struct Col2ImAttributes { explicit Col2ImAttributes(const OpKernelInfo& info) { // Make sure empty strides, pads or dilations are defaulted to 1 if necessary - ORT_ENFORCE(info.GetAttrs("strides", strides).IsOK()); + ORT_THROW_IF_ERROR(info.GetAttrs("strides", strides).IsOK()); gsl::span pads_span; - ORT_ENFORCE(info.GetAttrsAsSpan("pads", pads_span).IsOK()); + ORT_THROW_IF_ERROR(info.GetAttrsAsSpan("pads", pads_span).IsOK()); pads.assign(pads_span.cbegin(), pads_span.cend()); - ORT_ENFORCE(info.GetAttrs("dilations", dilations).IsOK()); + ORT_THROW_IF_ERROR(info.GetAttrs("dilations", dilations).IsOK()); } ~Col2ImAttributes() = default; From 9a8c6568d11acee81ddb686f3c10581ea79ee138 Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Tue, 6 Sep 2022 13:18:38 -0400 Subject: [PATCH 17/30] Address comments --- onnxruntime/contrib_ops/cpu/col2im.cc | 14 +- .../core/graph/contrib_ops/contrib_defs.cc | 153 +++++++++--------- .../core/providers/cpu/tensor/col2im.cc | 71 ++++---- .../providers/cpu/tensor/col2im_attributes.h | 6 +- .../tools/pytorch_export_contrib_ops.py | 38 ++--- onnxruntime/test/contrib_ops/col2im_test.cc | 115 +++++++------ 6 files changed, 198 insertions(+), 199 deletions(-) diff --git a/onnxruntime/contrib_ops/cpu/col2im.cc b/onnxruntime/contrib_ops/cpu/col2im.cc index 2f66b3a5ace4b..d6ed5495e49aa 100644 --- a/onnxruntime/contrib_ops/cpu/col2im.cc +++ b/onnxruntime/contrib_ops/cpu/col2im.cc @@ -6,13 +6,13 @@ namespace onnxruntime { namespace contrib { -#define REGISTER_KERNEL_TYPED(T) \ - ONNX_OPERATOR_TYPED_KERNEL_EX( \ - Col2Im, \ - kMSDomain, \ - 1, \ - T, \ - kCpuExecutionProvider, \ +#define REGISTER_KERNEL_TYPED(T) \ + ONNX_OPERATOR_TYPED_KERNEL_EX( \ + Col2Im, \ + kMSDomain, \ + 1, \ + T, \ + kCpuExecutionProvider, \ KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ Col2Im); diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index 715eb9cda97b5..0e3e27643ff8b 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -762,7 +762,8 @@ ONNX_MS_OPERATOR_SET_SCHEMA(BiasSoftmax, 1, "Y = softmax(scores + bias)) with simple broadcast on bias. " "Intended to specialize softmax(scores + additive_mask) commonly found in transformer models.") .Attr("axis", "apply softmax to elements for dimensions axis or higher", AttributeProto::INT, static_cast(1)) - .Attr("is_inner_broadcast", "true if broadcast bias across input for dimensions broadcast_axis to axis-1, " + .Attr("is_inner_broadcast", + "true if broadcast bias across input for dimensions broadcast_axis to axis-1, " "otherwise broadcast bias across input for dimensions 0 to broadcast_axis - 1", AttributeProto::INT) .Input(0, "data", "The input data as Tensor.", "T") @@ -1011,86 +1012,86 @@ but it only supports *batched* multi-dimensional image tensors. NOTE: Although specifying image_shape looks redundant because it could be calculated from convolution formulas, it is required as input for more advanced scenarios as explained - at PyTorch's implementation (https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/Col2Im.cpp#L10) + at PyTorch's implementation (https://github.com/pytorch/pytorch/blob/faac3dbce20a6068a3e530c11788896e81a73c64/aten/src/ATen/native/Col2Im.cpp#L10) )DOC"; ONNX_MS_OPERATOR_SET_SCHEMA(Col2Im, 1, OpSchema() - .SetDoc(Col2Im_ver1_doc) - .Attr( - "dilations", - "1-dimensional tensor with dilation value along each spatial axis of the image. " - "If not present, the dilation defaults to 1 along each spatial axis of the image.", - AttributeProto::INTS, - OPTIONAL_VALUE) - .Attr( - "pads", - "1-dimensional tensor with padding value for the beginning and ending along each" - " spatial axis, it can take any value greater than or equal to 0. " - "The value represent the number of pixels added to the beginning " - "and end part of the corresponding axis. `pads` format should be as follow " - "[x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin is the number of pixels " - "added at the beginning of axis `i` and xi_end the same for the end of axis `i`. " - "If not present, the padding defaults to 0 along start and end of each spatial axis.", - AttributeProto::INTS, - OPTIONAL_VALUE) - .Attr( - "strides", - "1-dimensional tensor with stride value along each spatial axis. " - "If not present, the stride defaults to 1 along each spatial axis.", - AttributeProto::INTS, - OPTIONAL_VALUE) - .Input( - 0, - "input", - "Input data tensor to be rearranged from column blocks back into an image." - " This is a 3-dimensional tensor containing [N, C * n-ary-product(block_shape), L]," - " where N is batch dimension, C is image channel dimension and L is number of blocks.", - "T", - OpSchema::Single, - true, - 1, - OpSchema::Differentiable) - .Input( - 1, - "image_shape", - "The shape of the spatial dimensions of the image after rearranging the column blocks." - "This is a 1-dim tensor with size of at least 2, containing the value [H_img, W_img] " - " for a 2-D image or [dim_i1, dim_i2, ..., dim_iN] for a N-D image.", - "tensor(int64)", - OpSchema::Single, - true, - 1, - OpSchema::NonDifferentiable) - .Input( - 2, - "block_shape", - "The shape of the block to apply on the input." - "This is a 1-dim tensor of size of at least 2, containing the value [H_block, W_block] " - " for a 2-D image or [dim_b1, dim_b2, ..., dim_bN] for a N-D block." - "Dilations, pads and strides are applied to block_shape under the hood." - "The kernel window start at the top-left of the block and slides to the right and down," - "similarly to how Convolution kernels do.", - "tensor(int64)", - OpSchema::Single, - true, - 1, - OpSchema::NonDifferentiable) - .Output( - 0, - "output", - "Output tensor produced by rearranging blocks into an image.", - "T", - OpSchema::Single, - true, - 1, - OpSchema::Differentiable) - .TypeConstraint( - "T", - OpSchema::all_tensor_types_with_bfloat(), - "Constrain input and output types to all numeric tensor types.") - .TypeAndShapeInferenceFunction([](InferenceContext& ctx) { col2imShapeInference(ctx); })); + .SetDoc(Col2Im_ver1_doc) + .Attr( + "dilations", + "1-dimensional tensor with dilation value along each spatial axis of the image. " + "If not present, the dilation defaults to 1 along each spatial axis of the image.", + AttributeProto::INTS, + OPTIONAL_VALUE) + .Attr( + "pads", + "1-dimensional tensor with padding value for the beginning and ending along each " + "spatial axis, it can take any value greater than or equal to 0. " + "The value represent the number of pixels added to the beginning " + "and end part of the corresponding axis. `pads` format should be as follow " + "[x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin is the number of pixels " + "added at the beginning of axis `i` and xi_end the same for the end of axis `i`. " + "If not present, the padding defaults to 0 along start and end of each spatial axis.", + AttributeProto::INTS, + OPTIONAL_VALUE) + .Attr( + "strides", + "1-dimensional tensor with stride value along each spatial axis. " + "If not present, the stride defaults to 1 along each spatial axis.", + AttributeProto::INTS, + OPTIONAL_VALUE) + .Input( + 0, + "input", + "Input data tensor to be rearranged from column blocks back into an image. " + "This is a 3-dimensional tensor containing [N, C * n-ary-product(block_shape), L], " + "where N is batch dimension, C is image channel dimension and L is number of blocks.", + "T", + OpSchema::Single, + true, + 1, + OpSchema::Differentiable) + .Input( + 1, + "image_shape", + "The shape of the spatial dimensions of the image after rearranging the column blocks. " + "This is a 1-dim tensor with size of at least 2, containing the value [H_img, W_img] " + "for a 2-D image or [dim_i1, dim_i2, ..., dim_iN] for a N-D image.", + "tensor(int64)", + OpSchema::Single, + true, + 1, + OpSchema::NonDifferentiable) + .Input( + 2, + "block_shape", + "The shape of the block to apply on the input." + "This is a 1-dim tensor of size of at least 2, containing the value [H_block, W_block] " + "for a 2-D image or [dim_b1, dim_b2, ..., dim_bN] for a N-D block. " + "Dilations, pads and strides are applied to block_shape under the hood. " + "The kernel window start at the top-left of the block and slides to the right and down, " + "similarly to how Convolution kernels do.", + "tensor(int64)", + OpSchema::Single, + true, + 1, + OpSchema::NonDifferentiable) + .Output( + 0, + "output", + "Output tensor produced by rearranging blocks into an image.", + "T", + OpSchema::Single, + true, + 1, + OpSchema::Differentiable) + .TypeConstraint( + "T", + OpSchema::all_tensor_types_with_bfloat(), + "Constrain input and output types to all numeric tensor types.") + .TypeAndShapeInferenceFunction([](InferenceContext& ctx) { col2imShapeInference(ctx); })); constexpr const char* GridSample_ver1_doc = R"DOC( Given an `input` and a flow-field `grid`, computes the `output` using `input` values and pixel locations from `grid`. diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index 0625999303e6d..adb1d68b8727e 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -4,11 +4,10 @@ #include "core/providers/cpu/tensor/col2im.h" #include "core/util/math_cpuonly.h" - namespace onnxruntime { -#define REGISTER_COL2IM_TYPED_KERNEL(OP_TYPE, VERSION, TYPE, KERNEL_CLASS) \ - ONNX_CPU_OPERATOR_TYPED_KERNEL( \ +#define REGISTER_COL2IM_TYPED_KERNEL(OP_TYPE, VERSION, TYPE, KERNEL_CLASS) \ + ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( \ OP_TYPE, \ VERSION, \ TYPE, \ @@ -26,9 +25,9 @@ Status Col2Im::Compute(OpKernelContext* context) const { int64_t image_shape_size = 1; int64_t kernel_shape_size = 1; TensorShapeVector adjusted_kernel_shape_dims; - for (auto i=0; i < image_shape->Shape().Size(); ++i) { - image_shape_size *= image_shape->Data()[i]; - kernel_shape_size *= kernel_shape->Data()[i]; + for (auto i = 0; i < image_shape->Shape().Size(); ++i) { + image_shape_size *= image_shape->Data()[i]; + kernel_shape_size *= kernel_shape->Data()[i]; adjusted_kernel_shape_dims.push_back(col2im_attrs_.dilations[i] * (kernel_shape->Data()[i] - 1) + 1); } TensorShape col_shape = col_tensor->Shape(); @@ -40,9 +39,9 @@ Status Col2Im::Compute(OpKernelContext* context) const { TensorShapeVector batched_image_shape_dims, adjusted_image_shape_dims; batched_image_shape_dims.insert(batched_image_shape_dims.begin(), {N, C}); - for (auto i=0; i < image_shape->Shape()[0]; ++i) { + for (auto i = 0; i < image_shape->Shape()[0]; ++i) { batched_image_shape_dims.push_back(image_shape->Data()[i]); - adjusted_image_shape_dims.push_back(image_shape->Data()[i]-adjusted_kernel_shape[i]+1); + adjusted_image_shape_dims.push_back(image_shape->Data()[i] - adjusted_kernel_shape[i] + 1); } TensorShape batched_image_shape(batched_image_shape_dims); T* image_data = context->Output(0, batched_image_shape)->template MutableData(); @@ -51,36 +50,36 @@ Status Col2Im::Compute(OpKernelContext* context) const { for (auto image_id = 0; image_id < N; ++image_id) { if (image_shape->Shape()[0] == 2) { math::Col2im( - col_data + image_id * col_data_stride, - C, - image_shape->Data()[0], - image_shape->Data()[1], - kernel_shape->Data()[0], - kernel_shape->Data()[1], - col2im_attrs_.dilations[0], - col2im_attrs_.dilations[1], - col2im_attrs_.pads[0], - col2im_attrs_.pads[1], - col2im_attrs_.pads[2], - col2im_attrs_.pads[3], - col2im_attrs_.strides[0], - col2im_attrs_.strides[1], - image_data + image_id * col_stride, - &CPUMathUtil::Instance()); + col_data + image_id * col_data_stride, + C, + image_shape->Data()[0], + image_shape->Data()[1], + kernel_shape->Data()[0], + kernel_shape->Data()[1], + col2im_attrs_.dilations[0], + col2im_attrs_.dilations[1], + col2im_attrs_.pads[0], + col2im_attrs_.pads[1], + col2im_attrs_.pads[2], + col2im_attrs_.pads[3], + col2im_attrs_.strides[0], + col2im_attrs_.strides[1], + image_data + image_id * col_stride, + &CPUMathUtil::Instance()); } else { math::Col2imNd( - col_data + image_id * col_data_stride, - image_shape->Data(), - adjusted_image_shape_dims.data(), - kernel_shape_size * C, - image_shape_size, - adjusted_kernel_shape.GetDims().data(), - col2im_attrs_.strides.data(), - col2im_attrs_.dilations.data(), - col2im_attrs_.pads.data(), - image_shape->Shape().Size(), - image_data + image_id * col_stride, - &CPUMathUtil::Instance()); + col_data + image_id * col_data_stride, + image_shape->Data(), + adjusted_image_shape_dims.data(), + kernel_shape_size * C, + image_shape_size, + adjusted_kernel_shape.GetDims().data(), + col2im_attrs_.strides.data(), + col2im_attrs_.dilations.data(), + col2im_attrs_.pads.data(), + image_shape->Shape().Size(), + image_data + image_id * col_stride, + &CPUMathUtil::Instance()); } } return Status::OK(); diff --git a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h index 2e9db6ca0bc15..49ddbe1cbb300 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h +++ b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h @@ -32,11 +32,11 @@ struct Col2ImAttributes { explicit Col2ImAttributes(const OpKernelInfo& info) { // Make sure empty strides, pads or dilations are defaulted to 1 if necessary - ORT_THROW_IF_ERROR(info.GetAttrs("strides", strides).IsOK()); + ORT_THROW_IF_ERROR(info.GetAttrs("strides", strides)); gsl::span pads_span; - ORT_THROW_IF_ERROR(info.GetAttrsAsSpan("pads", pads_span).IsOK()); + ORT_THROW_IF_ERROR(info.GetAttrsAsSpan("pads", pads_span)); pads.assign(pads_span.cbegin(), pads_span.cend()); - ORT_THROW_IF_ERROR(info.GetAttrs("dilations", dilations).IsOK()); + ORT_THROW_IF_ERROR(info.GetAttrs("dilations", dilations)); } ~Col2ImAttributes() = default; diff --git a/onnxruntime/python/tools/pytorch_export_contrib_ops.py b/onnxruntime/python/tools/pytorch_export_contrib_ops.py index 82a5c558a2f59..8271822673421 100644 --- a/onnxruntime/python/tools/pytorch_export_contrib_ops.py +++ b/onnxruntime/python/tools/pytorch_export_contrib_ops.py @@ -1,5 +1,5 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. +#Copyright(c) Microsoft Corporation.All rights reserved. +#Licensed under the MIT License. """ Support for registering ONNX Runtime's built-in contrib ops with @@ -8,7 +8,7 @@ import typing try: - # TODO(justinchuby): Create a function to alert users when torch is not installed +#TODO(justinchuby) : Create a function to alert users when torch is not installed import torch except ModuleNotFoundError: raise ModuleNotFoundError( @@ -35,26 +35,26 @@ def register(): """ def grid_sampler(g, input, grid, mode, padding_mode, align_corners): - # mode - # 'bilinear' : onnx::Constant[value={0}] - # 'nearest' : onnx::Constant[value={1}] - # 'bicubic' : onnx::Constant[value={2}] - # padding_mode - # 'zeros' : onnx::Constant[value={0}] - # 'border' : onnx::Constant[value={1}] - # 'reflection' : onnx::Constant[value={2}] +#mode +#'bilinear' : onnx::Constant[value = {0 }] +#'nearest' : onnx::Constant[value = {1 }] +#'bicubic' : onnx::Constant[value = {2 }] +#padding_mode +#'zeros' : onnx::Constant[value = {0 }] +#'border' : onnx::Constant[value = {1 }] +#'reflection' : onnx::Constant[value = {2 }] mode = sym_help._maybe_get_const(mode, "i") padding_mode = sym_help._maybe_get_const(padding_mode, "i") mode_str = ["bilinear", "nearest", "bicubic"][mode] padding_mode_str = ["zeros", "border", "reflection"][padding_mode] align_corners = int(sym_help._maybe_get_const(align_corners, "b")) - # From opset v13 onward, the output shape can be specified with - # (N, C, H, W) (N, H_out, W_out, 2) => (N, C, H_out, W_out) - # input_shape = input.type().sizes() - # gird_shape = grid.type().sizes() - # output_shape = input_shape[:2] + gird_shape[1:3] - # g.op(...).setType(input.type().with_sizes(output_shape)) +#From opset v13 onward, the output shape can be specified with +#(N, C, H, W)(N, H_out, W_out, 2) =>(N, C, H_out, W_out) +#input_shape = input.type().sizes() +#gird_shape = grid.type().sizes() +#output_shape = input_shape[ : 2] + gird_shape[1 : 3] +#g.op(...).setType(input.type().with_sizes(output_shape)) return g.op( "com.microsoft::GridSample", @@ -74,7 +74,7 @@ def inverse(g, self): @torch.onnx.symbolic_helper.parse_args("v", "s") def gelu(g, self: torch._C.Value, approximate: str = "none"): - # Use microsoft::Gelu for performance if possible. It only supports approximate == "none" +#Use microsoft::Gelu for performance if possible.It only supports approximate == "none" if approximate == "none": return g.op("com.microsoft::Gelu", self).setType(self.type()) return torch.onnx.symbolic_opset9.gelu(g, self, approximate) @@ -103,7 +103,7 @@ def unregister(): try: torch.onnx.unregister_custom_op_symbolic(name, _OPSET_VERSION) except AttributeError: - # unregister_custom_op_symbolic is not available before PyTorch 1.12 +#unregister_custom_op_symbolic is not available before PyTorch 1.12 namespace, kind = name.split("::") for version in sym_help._onnx_stable_opsets: if version >= _OPSET_VERSION and sym_registry.is_registered_op(kind, namespace, version): diff --git a/onnxruntime/test/contrib_ops/col2im_test.cc b/onnxruntime/test/contrib_ops/col2im_test.cc index 0801f3792509e..3031975c0df2d 100644 --- a/onnxruntime/test/contrib_ops/col2im_test.cc +++ b/onnxruntime/test/contrib_ops/col2im_test.cc @@ -12,24 +12,23 @@ namespace onnxruntime { namespace contrib { namespace test { - namespace { template -std::vector TransposeSerializedVector(std::vector &input, size_t N, size_t C, size_t H, size_t W) { - size_t input_size = input.size(); - if (input_size == 0) { - throw std::runtime_error("Invalid input"); - } - std::vector trans_vec(input); - - for (size_t n = 0; n < N; ++n) - for (size_t c = 0; c < C; ++c) - for (size_t h = 0; h < H; ++h) - for (size_t w = 0; w < W; ++w) - trans_vec[n * (C * H * W) + c * (H * W) + (h + H * w)] = \ - input[n * (C * H * W) + c * (H * W) + (w + W * h)]; - - return trans_vec; +std::vector TransposeSerializedVector(std::vector& input, size_t N, size_t C, size_t H, size_t W) { + size_t input_size = input.size(); + if (input_size == 0) { + throw std::runtime_error("Invalid input"); + } + std::vector trans_vec(input); + + for (size_t n = 0; n < N; ++n) + for (size_t c = 0; c < C; ++c) + for (size_t h = 0; h < H; ++h) + for (size_t w = 0; w < W; ++w) + trans_vec[n * (C * H * W) + c * (H * W) + (h + H * w)] = + input[n * (C * H * W) + c * (H * W) + (w + W * h)]; + + return trans_vec; } } // namespace @@ -46,9 +45,9 @@ TEST(Col2ImContribOpTest, simple4dNCHW) { std::iota(output.begin(), output.end(), 1.0f); input = TransposeSerializedVector(output, 1, 1, 5, 5); - test.AddInput("input", {1, 5, 5}, input); - test.AddInput("image_shape", {2}, std::vector{5, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 5}); + test.AddInput("input", {1, 5, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); test.AddOutput("output", {1, 1, 5, 5}, output); test.Run(); @@ -65,9 +64,9 @@ TEST(Col2ImContribOpTest, with2Images3channelsNonSquare4dNCHW) { std::vector output(120); std::iota(output.begin(), output.end(), 1.0f); input = TransposeSerializedVector(output, 2, 3, 4, 5); - test.AddInput("input", {2, 15, 4}, input); - test.AddInput("image_shape", {2}, std::vector{4, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 5}); + test.AddInput("input", {2, 15, 4}, input); + test.AddInput("image_shape", {2}, std::vector{4, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); test.AddOutput("output", {2, 3, 4, 5}, output); test.Run(); @@ -80,33 +79,33 @@ TEST(Col2ImContribOpTest, with2Images2channelsNonSquareDilationPadStride4dNCHW) test.AddAttribute("dilations", std::vector{2, 2}); test.AddAttribute("pads", std::vector{2, 2, 2, 2}); - std::vector input{ 0., 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., - 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., - 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., - 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., - 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., 0.}; - std::vector output { 2., 0., 6., 0., 10., - 0., 0., 0., 0., 0., - 22., 0., 26., 0., 30., - 0., 0., 0., 0., 0., - 42., 0., 46., 0., 50., - 0., 0., 0., 0., 0., - 62., 0., 66., 0., 70., - 0., 0., 0., 0., 0., - 82., 0., 86., 0., 90., - 0., 0., 0., 0., 0., - 102., 0., 106., 0., 110., - 0., 0., 0., 0., 0., - 122., 0., 126., 0., 130., - 0., 0., 0., 0., 0., - 142., 0., 146., 0., 150., - 0., 0., 0., 0., 0.}; - test.AddInput("input", {2, 4, 16}, input); - test.AddInput("image_shape", {2}, std::vector{4, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 2}); + std::vector input{0., 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., + 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., + 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., + 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., + 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., 0.}; + std::vector output{2., 0., 6., 0., 10., + 0., 0., 0., 0., 0., + 22., 0., 26., 0., 30., + 0., 0., 0., 0., 0., + 42., 0., 46., 0., 50., + 0., 0., 0., 0., 0., + 62., 0., 66., 0., 70., + 0., 0., 0., 0., 0., + 82., 0., 86., 0., 90., + 0., 0., 0., 0., 0., + 102., 0., 106., 0., 110., + 0., 0., 0., 0., 0., + 122., 0., 126., 0., 130., + 0., 0., 0., 0., 0., + 142., 0., 146., 0., 150., + 0., 0., 0., 0., 0.}; + test.AddInput("input", {2, 4, 16}, input); + test.AddInput("image_shape", {2}, std::vector{4, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 2}); test.AddOutput("output", {2, 2, 4, 5}, output); test.Run(); @@ -123,9 +122,9 @@ TEST(Col2ImContribOpTest, with3channels4dNCHW) { std::vector output(75); std::iota(output.begin(), output.end(), 1.0f); input = TransposeSerializedVector(output, 1, 3, 5, 5); - test.AddInput("input", {1, 15, 5}, input); - test.AddInput("image_shape", {2}, std::vector{5, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 5}); + test.AddInput("input", {1, 15, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); test.AddOutput("output", {1, 3, 5, 5}, output); test.Run(); @@ -142,9 +141,9 @@ TEST(Col2ImContribOpTest, with2Images3channels4dNCHW) { std::vector output(150); std::iota(output.begin(), output.end(), 1.0f); input = TransposeSerializedVector(output, 2, 3, 5, 5); - test.AddInput("input", {2, 15, 5}, input); - test.AddInput("image_shape", {2}, std::vector{5, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 5}); + test.AddInput("input", {2, 15, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); test.AddOutput("output", {2, 3, 5, 5}, output); test.Run(); @@ -161,9 +160,9 @@ TEST(Col2ImContribOpTest, simple5dNCHWD) { std::vector output(25); std::iota(output.begin(), output.end(), 1.0f); input = TransposeSerializedVector(output, 1, 1, 5, 5); - test.AddInput("input", {1, 5, 5}, input); - test.AddInput("image_shape", {3}, std::vector{1, 5, 5}); - test.AddInput("block_shape", {3}, std::vector{1, 1, 5}); + test.AddInput("input", {1, 5, 5}, input); + test.AddInput("image_shape", {3}, std::vector{1, 5, 5}); + test.AddInput("block_shape", {3}, std::vector{1, 1, 5}); test.AddOutput("output", {1, 1, 1, 5, 5}, output); test.Run(); } From 7d176824ce5824eacc26a7da19ddf514ac75c9c9 Mon Sep 17 00:00:00 2001 From: Liqun Fu Date: Wed, 11 Jan 2023 16:34:06 -0800 Subject: [PATCH 18/30] use onnx spec for Col2Im Signed-off-by: Liqun Fu --- .../core/graph/contrib_ops/contrib_defs.cc | 188 ------------------ onnxruntime/core/graph/contrib_ops/ms_opset.h | 2 - .../providers/cpu/cpu_execution_provider.cc | 2 + .../core/providers/cpu/tensor/col2im.cc | 53 +++-- .../core/providers/cpu/tensor/col2im.h | 1 + .../providers/cpu/tensor/col2im_attributes.h | 16 +- .../tools/pytorch_export_contrib_ops.py | 20 +- onnxruntime/test/contrib_ops/col2im_test.cc | 172 ---------------- .../python/contrib_ops/onnx_test_col2im.py | 55 ----- .../test_col2im/test_data_set_0/input_0.pb | Bin 117 -> 0 bytes .../test_col2im/test_data_set_0/input_1.pb | Bin 35 -> 0 bytes .../test_col2im/test_data_set_0/input_2.pb | Bin 35 -> 0 bytes .../test_col2im/test_data_set_0/output_0.pb | Bin 120 -> 0 bytes .../test_col2im_5d/test_data_set_0/input_0.pb | Bin 498 -> 0 bytes .../test_col2im_5d/test_data_set_0/input_1.pb | Bin 43 -> 0 bytes .../test_col2im_5d/test_data_set_0/input_2.pb | Bin 43 -> 0 bytes .../test_data_set_0/output_0.pb | Bin 503 -> 0 bytes .../test_data_set_0/input_0.pb | Bin 97 -> 0 bytes .../test_data_set_0/input_1.pb | Bin 35 -> 0 bytes .../test_data_set_0/input_2.pb | Bin 35 -> 0 bytes .../test_data_set_0/output_0.pb | Bin 165 -> 0 bytes .../test_data_set_0/input_0.pb | Bin 318 -> 0 bytes .../test_data_set_0/input_1.pb | Bin 35 -> 0 bytes .../test_data_set_0/input_2.pb | Bin 35 -> 0 bytes .../test_data_set_0/output_0.pb | Bin 120 -> 0 bytes .../test_data_set_0/input_0.pb | Bin 162 -> 0 bytes .../test_data_set_0/input_1.pb | Bin 35 -> 0 bytes .../test_data_set_0/input_2.pb | Bin 35 -> 0 bytes .../test_data_set_0/output_0.pb | Bin 120 -> 0 bytes .../test_col2im/test_data_set_0/input_0.pb | Bin 117 -> 0 bytes .../test_col2im/test_data_set_0/input_1.pb | Bin 35 -> 0 bytes .../test_col2im/test_data_set_0/input_2.pb | Bin 35 -> 0 bytes .../test_col2im/test_data_set_0/output_0.pb | Bin 145 -> 0 bytes 33 files changed, 52 insertions(+), 457 deletions(-) delete mode 100644 onnxruntime/test/contrib_ops/col2im_test.cc delete mode 100644 onnxruntime/test/python/contrib_ops/onnx_test_col2im.py delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_0.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_1.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_2.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/output_0.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_5d/test_data_set_0/input_0.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_5d/test_data_set_0/input_1.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_5d/test_data_set_0/input_2.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_5d/test_data_set_0/output_0.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_0.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_1.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_2.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/output_0.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_0.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_1.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_2.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/output_0.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_0.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_1.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_2.pb delete mode 100644 onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/output_0.pb delete mode 100644 onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_0.pb delete mode 100644 onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_1.pb delete mode 100644 onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_2.pb delete mode 100644 onnxruntime/test/python/testdata/test_col2im/test_data_set_0/output_0.pb diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index f05112b7a628a..15f8599f52534 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -955,194 +955,6 @@ ONNX_MS_OPERATOR_SET_SCHEMA(IsAllFinite, 1, updateOutputElemType(ctx, 0, ONNX_NAMESPACE::TensorProto::BOOL); })); -void col2imShapeInference(InferenceContext& ctx) { - propagateElemTypeFromInputToOutput(ctx, 0, 0); - - // All inputs shapes are required - if (!hasNInputShapes(ctx, 3)) { - return; - } - - // Assuming image_shape has correct spatial dimensions and reused for next validation steps - // An alternative is get the the number of spatial dimensions as an input - if (ctx.getInputType(1)->tensor_type().shape().dim_size() != 1) { - fail_shape_inference("image_shape tensor must have rank 1."); - } - size_t n_input_dims = static_cast(ctx.getInputType(1)->tensor_type().shape().dim(0).dim_value()); - std::vector image_shape = {}; - const TensorProto* image_shape_data = ctx.getInputData(1); - if (image_shape_data) { - image_shape = ParseData(image_shape_data); - if (image_shape.size() != n_input_dims) { - fail_shape_inference("image_shape tensor must have ", n_input_dims, " spatial dimensions."); - } - } - - std::vector pads = {}; - if (getRepeatedAttribute(ctx, "pads", pads)) { - if ((pads.size() != 0) && (pads.size() != n_input_dims * 2)) { - fail_shape_inference("Attribute pads has incorrect size"); - } - } - - std::vector dilations = {}; - if (getRepeatedAttribute(ctx, "dilations", dilations)) { - if ((dilations.size() != 0) && (dilations.size() != n_input_dims)) { - fail_shape_inference("Attribute dilations has incorrect size"); - } - } - - std::vector strides = {}; - if (getRepeatedAttribute(ctx, "strides", strides)) { - if ((strides.size() != 0) && (strides.size() != n_input_dims)) { - fail_shape_inference("Attribute strides has incorrect size"); - } - } - - auto input_shape = ctx.getInputType(0)->tensor_type().shape(); - if (input_shape.dim_size() != 3) { - fail_shape_inference("input must have rank 3."); - } - - std::vector block_shape = {}; - const TensorProto* block_shape_data = ctx.getInputData(2); - if (block_shape_data) { - block_shape = ParseData(block_shape_data); - if (block_shape.size() != n_input_dims) { - fail_shape_inference("block_shape tensor must have ", n_input_dims, " spatial dimensions."); - } - } - if (ctx.getInputType(2)->tensor_type().shape().dim_size() != 1) { - fail_shape_inference("block_shape tensor must have rank 1."); - } else if ( - (ctx.getInputType(2)->tensor_type().shape().dim(0).has_dim_value()) && - (ctx.getInputType(2)->tensor_type().shape().dim(0).dim_value() != static_cast(n_input_dims))) { - fail_shape_inference("block_shape tensor must have ", n_input_dims, " spatial dimensions."); - } - - int64_t block_shape_size = 0; - if (static_cast(block_shape.size()) > 0) { - block_shape_size = 1; - for (const auto& dim : block_shape) { - block_shape_size *= dim; - } - } - - // Final shape will be (N, C, dim_1, ..., dim_N) - auto final_image_shape = ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape(); - - // Dimensions N and C are always present - Dim N, C; - if (ctx.getInputType(0)->tensor_type().shape().dim(0).has_dim_value()) { - N = input_shape.dim(0); // Otherwise, N is unknown. - } - *final_image_shape->add_dim() = N; - - if (block_shape_size > 0) { - C = input_shape.dim(1) / block_shape_size; // Otherwise, C is unknown. - } - *final_image_shape->add_dim() = C; - - // Image dimensions are dynamic - for (size_t i = 0; i < n_input_dims; ++i) { - Dim image_dim_i; - if (image_shape.size() > 0) { - image_dim_i.set_dim_value(image_shape[i]); // Otherwise, spatial dimensions are unknown - } - *final_image_shape->add_dim() = image_dim_i; - } - return; -} - -constexpr const char* Col2Im_ver1_doc = R"DOC( -The operator rearranges column blocks back into a multidimensional image - -Col2Im behaves similarly to PyTorch's fold https://pytorch.org/docs/stable/generated/torch.nn.Fold.html, -but it only supports *batched* multi-dimensional image tensors. - -NOTE: Although specifying image_shape looks redundant because it could be calculated from - convolution formulas, it is required as input for more advanced scenarios as explained - at PyTorch's implementation (https://github.com/pytorch/pytorch/blob/faac3dbce20a6068a3e530c11788896e81a73c64/aten/src/ATen/native/Col2Im.cpp#L10) - -)DOC"; - -ONNX_MS_OPERATOR_SET_SCHEMA(Col2Im, 1, - OpSchema() - .SetDoc(Col2Im_ver1_doc) - .Attr( - "dilations", - "1-dimensional tensor with dilation value along each spatial axis of the image. " - "If not present, the dilation defaults to 1 along each spatial axis of the image.", - AttributeProto::INTS, - OPTIONAL_VALUE) - .Attr( - "pads", - "1-dimensional tensor with padding value for the beginning and ending along each " - "spatial axis, it can take any value greater than or equal to 0. " - "The value represent the number of pixels added to the beginning " - "and end part of the corresponding axis. `pads` format should be as follow " - "[x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin is the number of pixels " - "added at the beginning of axis `i` and xi_end the same for the end of axis `i`. " - "If not present, the padding defaults to 0 along start and end of each spatial axis.", - AttributeProto::INTS, - OPTIONAL_VALUE) - .Attr( - "strides", - "1-dimensional tensor with stride value along each spatial axis. " - "If not present, the stride defaults to 1 along each spatial axis.", - AttributeProto::INTS, - OPTIONAL_VALUE) - .Input( - 0, - "input", - "Input data tensor to be rearranged from column blocks back into an image. " - "This is a 3-dimensional tensor containing [N, C * n-ary-product(block_shape), L], " - "where N is batch dimension, C is image channel dimension and L is number of blocks.", - "T", - OpSchema::Single, - true, - 1, - OpSchema::Differentiable) - .Input( - 1, - "image_shape", - "The shape of the spatial dimensions of the image after rearranging the column blocks. " - "This is a 1-dim tensor with size of at least 2, containing the value [H_img, W_img] " - "for a 2-D image or [dim_i1, dim_i2, ..., dim_iN] for a N-D image.", - "tensor(int64)", - OpSchema::Single, - true, - 1, - OpSchema::NonDifferentiable) - .Input( - 2, - "block_shape", - "The shape of the block to apply on the input." - "This is a 1-dim tensor of size of at least 2, containing the value [H_block, W_block] " - "for a 2-D image or [dim_b1, dim_b2, ..., dim_bN] for a N-D block. " - "Dilations, pads and strides are applied to block_shape under the hood. " - "The kernel window start at the top-left of the block and slides to the right and down, " - "similarly to how Convolution kernels do.", - "tensor(int64)", - OpSchema::Single, - true, - 1, - OpSchema::NonDifferentiable) - .Output( - 0, - "output", - "Output tensor produced by rearranging blocks into an image.", - "T", - OpSchema::Single, - true, - 1, - OpSchema::Differentiable) - .TypeConstraint( - "T", - OpSchema::all_tensor_types_with_bfloat(), - "Constrain input and output types to all numeric tensor types.") - .TypeAndShapeInferenceFunction([](InferenceContext& ctx) { col2imShapeInference(ctx); })); - constexpr const char* GridSample_ver1_doc = R"DOC( Given an `input` and a flow-field `grid`, computes the `output` using `input` values and pixel locations from `grid`. Currently, only spatial (4-D) inputs are supported. For `input` with shape (N, C, H, W) and `grid` with shape (N, H_out, W_out, 2), diff --git a/onnxruntime/core/graph/contrib_ops/ms_opset.h b/onnxruntime/core/graph/contrib_ops/ms_opset.h index 538a0ce1081eb..1f0af31a4bdd0 100644 --- a/onnxruntime/core/graph/contrib_ops/ms_opset.h +++ b/onnxruntime/core/graph/contrib_ops/ms_opset.h @@ -68,7 +68,6 @@ class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GatherND); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Gelu); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, QuickGelu); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GreedySearch); -class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Col2Im); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, GridSample); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Inverse); class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Microsoft, 1, Irfft); @@ -155,7 +154,6 @@ class OpSet_Microsoft_ver1 { fn(GetOpSchema()); fn(GetOpSchema()); fn(GetOpSchema()); - fn(GetOpSchema()); fn(GetOpSchema()); fn(GetOpSchema()); fn(GetOpSchema()); diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index d1ad5dc5b715f..08352b2fb88d0 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -830,6 +830,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, float, ReduceSumSquare); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, double, ReduceSumSquare); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int32_t, ReduceSumSquare); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, Col2Im); // Opset 18 #if !defined(DISABLE_OPTIONAL_TYPE) class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, OptionalHasElement); @@ -2128,6 +2129,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { ReduceSumSquare)>, BuildKernelCreateInfo, + BuildKernelCreateInfo, #if !defined(DISABLE_OPTIONAL_TYPE) BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index adb1d68b8727e..bf91aa82d31cb 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -6,15 +6,11 @@ namespace onnxruntime { -#define REGISTER_COL2IM_TYPED_KERNEL(OP_TYPE, VERSION, TYPE, KERNEL_CLASS) \ - ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( \ - OP_TYPE, \ - VERSION, \ - TYPE, \ - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ - KERNEL_CLASS); - -REGISTER_COL2IM_TYPED_KERNEL(Col2Im, 1, float, Col2Im); +ONNX_CPU_OPERATOR_KERNEL( + Col2Im, + 18, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Col2Im); template Status Col2Im::Compute(OpKernelContext* context) const { @@ -22,13 +18,28 @@ Status Col2Im::Compute(OpKernelContext* context) const { const auto* image_shape = context->Input(1); const auto* kernel_shape = context->Input(2); + size_t image_dim_size = image_shape->Shape().Size(); + TensorShapeVector pads = col2im_attrs_.pads; + TensorShapeVector dilations = col2im_attrs_.dilations; + TensorShapeVector strides = col2im_attrs_.strides; + + if (dilations.empty()) { + dilations.resize(image_dim_size, 1); + } + if (pads.empty()) { + pads.resize(image_dim_size * 2, 0); + } + if (strides.empty()) { + strides.resize(image_dim_size, 1); + } + int64_t image_shape_size = 1; int64_t kernel_shape_size = 1; TensorShapeVector adjusted_kernel_shape_dims; for (auto i = 0; i < image_shape->Shape().Size(); ++i) { image_shape_size *= image_shape->Data()[i]; kernel_shape_size *= kernel_shape->Data()[i]; - adjusted_kernel_shape_dims.push_back(col2im_attrs_.dilations[i] * (kernel_shape->Data()[i] - 1) + 1); + adjusted_kernel_shape_dims.push_back(dilations[i] * (kernel_shape->Data()[i] - 1) + 1); } TensorShape col_shape = col_tensor->Shape(); const auto N = col_shape[0]; @@ -56,14 +67,14 @@ Status Col2Im::Compute(OpKernelContext* context) const { image_shape->Data()[1], kernel_shape->Data()[0], kernel_shape->Data()[1], - col2im_attrs_.dilations[0], - col2im_attrs_.dilations[1], - col2im_attrs_.pads[0], - col2im_attrs_.pads[1], - col2im_attrs_.pads[2], - col2im_attrs_.pads[3], - col2im_attrs_.strides[0], - col2im_attrs_.strides[1], + dilations[0], + dilations[1], + pads[0], + pads[1], + pads[2], + pads[3], + strides[0], + strides[1], image_data + image_id * col_stride, &CPUMathUtil::Instance()); } else { @@ -74,9 +85,9 @@ Status Col2Im::Compute(OpKernelContext* context) const { kernel_shape_size * C, image_shape_size, adjusted_kernel_shape.GetDims().data(), - col2im_attrs_.strides.data(), - col2im_attrs_.dilations.data(), - col2im_attrs_.pads.data(), + strides.data(), + dilations.data(), + pads.data(), image_shape->Shape().Size(), image_data + image_id * col_stride, &CPUMathUtil::Instance()); diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.h b/onnxruntime/core/providers/cpu/tensor/col2im.h index b5849ecc9426b..50bdad46a72a0 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.h +++ b/onnxruntime/core/providers/cpu/tensor/col2im.h @@ -3,6 +3,7 @@ #pragma once +#include "core/framework/op_kernel.h" #include "core/providers/cpu/tensor/col2im_attributes.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h index 49ddbe1cbb300..c8e01e396656a 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h +++ b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h @@ -28,20 +28,18 @@ namespace onnxruntime { struct Col2ImAttributes { - using Col2ImPadVector = InlinedVector; - explicit Col2ImAttributes(const OpKernelInfo& info) { - // Make sure empty strides, pads or dilations are defaulted to 1 if necessary - ORT_THROW_IF_ERROR(info.GetAttrs("strides", strides)); - gsl::span pads_span; - ORT_THROW_IF_ERROR(info.GetAttrsAsSpan("pads", pads_span)); - pads.assign(pads_span.cbegin(), pads_span.cend()); - ORT_THROW_IF_ERROR(info.GetAttrs("dilations", dilations)); + if (!info.GetAttrs("strides", strides).IsOK()) + ORT_ENFORCE(strides.empty()); + if (!info.GetAttrs("dilations", dilations).IsOK()) + ORT_ENFORCE(dilations.empty()); + if (!info.GetAttrs("pads", pads).IsOK()) + ORT_ENFORCE(pads.empty()); } ~Col2ImAttributes() = default; - Col2ImPadVector pads; + TensorShapeVector pads; TensorShapeVector dilations; TensorShapeVector strides; }; diff --git a/onnxruntime/python/tools/pytorch_export_contrib_ops.py b/onnxruntime/python/tools/pytorch_export_contrib_ops.py index 8b40b7fedb7c5..7df091df53dd5 100644 --- a/onnxruntime/python/tools/pytorch_export_contrib_ops.py +++ b/onnxruntime/python/tools/pytorch_export_contrib_ops.py @@ -1,5 +1,5 @@ -#Copyright(c) Microsoft Corporation.All rights reserved. -#Licensed under the MIT License. +# Copyright(c) Microsoft Corporation.All rights reserved. +# Licensed under the MIT License. """ Support for registering ONNX Runtime's built-in contrib ops with @@ -8,7 +8,7 @@ import typing try: -O(justinchuby) : Create a function to alert users when torch is not installed + # TODO(justinchuby): Create a function to alert users when torch is not installed import torch except ModuleNotFoundError: raise ModuleNotFoundError( @@ -48,12 +48,12 @@ def grid_sampler(g, input, grid, mode, padding_mode, align_corners): padding_mode_str = ["zeros", "border", "reflection"][padding_mode] align_corners = int(symbolic_helper._maybe_get_const(align_corners, "b")) -#From opset v13 onward, the output shape can be specified with -#(N, C, H, W)(N, H_out, W_out, 2) =>(N, C, H_out, W_out) -#input_shape = input.type().sizes() -#gird_shape = grid.type().sizes() -#output_shape = input_shape[ : 2] + gird_shape[1 : 3] -#g.op(...).setType(input.type().with_sizes(output_shape)) + # From opset v13 onward, the output shape can be specified with + # (N, C, H, W)(N, H_out, W_out, 2) =>(N, C, H_out, W_out) + # input_shape = input.type().sizes() + # gird_shape = grid.type().sizes() + # output_shape = input_shape[ : 2] + gird_shape[1 : 3] + # g.op(...).setType(input.type().with_sizes(output_shape)) return g.op( "com.microsoft::GridSample", @@ -73,7 +73,7 @@ def inverse(g, self): @torch.onnx.symbolic_helper.parse_args("v", "s") def gelu(g, self: torch._C.Value, approximate: str = "none"): -#Use microsoft::Gelu for performance if possible.It only supports approximate == "none" + # Use microsoft::Gelu for performance if possible.It only supports approximate == "none" if approximate == "none": return g.op("com.microsoft::Gelu", self).setType(self.type()) return torch.onnx.symbolic_opset9.gelu(g, self, approximate) diff --git a/onnxruntime/test/contrib_ops/col2im_test.cc b/onnxruntime/test/contrib_ops/col2im_test.cc deleted file mode 100644 index 3031975c0df2d..0000000000000 --- a/onnxruntime/test/contrib_ops/col2im_test.cc +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include -#include "gtest/gtest.h" -#include "test/providers/provider_test_utils.h" - -using namespace onnxruntime::test; -#include "core/util/math.h" - -namespace onnxruntime { -namespace contrib { -namespace test { - -namespace { -template -std::vector TransposeSerializedVector(std::vector& input, size_t N, size_t C, size_t H, size_t W) { - size_t input_size = input.size(); - if (input_size == 0) { - throw std::runtime_error("Invalid input"); - } - std::vector trans_vec(input); - - for (size_t n = 0; n < N; ++n) - for (size_t c = 0; c < C; ++c) - for (size_t h = 0; h < H; ++h) - for (size_t w = 0; w < W; ++w) - trans_vec[n * (C * H * W) + c * (H * W) + (h + H * w)] = - input[n * (C * H * W) + c * (H * W) + (w + W * h)]; - - return trans_vec; -} - -} // namespace - -TEST(Col2ImContribOpTest, simple4dNCHW) { - OpTester test("Col2Im", 1, kMSDomain); - - test.AddAttribute("strides", std::vector{1, 1}); - test.AddAttribute("dilations", std::vector{1, 1}); - test.AddAttribute("pads", std::vector{0, 0, 0, 0}); - - std::vector input(25); - std::vector output(25); - std::iota(output.begin(), output.end(), 1.0f); - - input = TransposeSerializedVector(output, 1, 1, 5, 5); - test.AddInput("input", {1, 5, 5}, input); - test.AddInput("image_shape", {2}, std::vector{5, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 5}); - - test.AddOutput("output", {1, 1, 5, 5}, output); - test.Run(); -} - -TEST(Col2ImContribOpTest, with2Images3channelsNonSquare4dNCHW) { - OpTester test("Col2Im", 1, kMSDomain); - - test.AddAttribute("strides", std::vector{1, 1}); - test.AddAttribute("dilations", std::vector{1, 1}); - test.AddAttribute("pads", std::vector{0, 0, 0, 0}); - - std::vector input(120); - std::vector output(120); - std::iota(output.begin(), output.end(), 1.0f); - input = TransposeSerializedVector(output, 2, 3, 4, 5); - test.AddInput("input", {2, 15, 4}, input); - test.AddInput("image_shape", {2}, std::vector{4, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 5}); - - test.AddOutput("output", {2, 3, 4, 5}, output); - test.Run(); -} - -TEST(Col2ImContribOpTest, with2Images2channelsNonSquareDilationPadStride4dNCHW) { - OpTester test("Col2Im", 1, kMSDomain); - - test.AddAttribute("strides", std::vector{2, 2}); - test.AddAttribute("dilations", std::vector{2, 2}); - test.AddAttribute("pads", std::vector{2, 2, 2, 2}); - - std::vector input{0., 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., - 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., - 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., - 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., - 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., 0.}; - std::vector output{2., 0., 6., 0., 10., - 0., 0., 0., 0., 0., - 22., 0., 26., 0., 30., - 0., 0., 0., 0., 0., - 42., 0., 46., 0., 50., - 0., 0., 0., 0., 0., - 62., 0., 66., 0., 70., - 0., 0., 0., 0., 0., - 82., 0., 86., 0., 90., - 0., 0., 0., 0., 0., - 102., 0., 106., 0., 110., - 0., 0., 0., 0., 0., - 122., 0., 126., 0., 130., - 0., 0., 0., 0., 0., - 142., 0., 146., 0., 150., - 0., 0., 0., 0., 0.}; - test.AddInput("input", {2, 4, 16}, input); - test.AddInput("image_shape", {2}, std::vector{4, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 2}); - - test.AddOutput("output", {2, 2, 4, 5}, output); - test.Run(); -} - -TEST(Col2ImContribOpTest, with3channels4dNCHW) { - OpTester test("Col2Im", 1, kMSDomain); - - test.AddAttribute("strides", std::vector{1, 1}); - test.AddAttribute("dilations", std::vector{1, 1}); - test.AddAttribute("pads", std::vector{0, 0, 0, 0}); - - std::vector input(75); - std::vector output(75); - std::iota(output.begin(), output.end(), 1.0f); - input = TransposeSerializedVector(output, 1, 3, 5, 5); - test.AddInput("input", {1, 15, 5}, input); - test.AddInput("image_shape", {2}, std::vector{5, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 5}); - - test.AddOutput("output", {1, 3, 5, 5}, output); - test.Run(); -} - -TEST(Col2ImContribOpTest, with2Images3channels4dNCHW) { - OpTester test("Col2Im", 1, kMSDomain); - - test.AddAttribute("strides", std::vector{1, 1}); - test.AddAttribute("dilations", std::vector{1, 1}); - test.AddAttribute("pads", std::vector{0, 0, 0, 0}); - - std::vector input(150); - std::vector output(150); - std::iota(output.begin(), output.end(), 1.0f); - input = TransposeSerializedVector(output, 2, 3, 5, 5); - test.AddInput("input", {2, 15, 5}, input); - test.AddInput("image_shape", {2}, std::vector{5, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 5}); - - test.AddOutput("output", {2, 3, 5, 5}, output); - test.Run(); -} - -TEST(Col2ImContribOpTest, simple5dNCHWD) { - OpTester test("Col2Im", 1, kMSDomain); - - test.AddAttribute("strides", std::vector{1, 1, 1}); - test.AddAttribute("dilations", std::vector{1, 1, 1}); - test.AddAttribute("pads", std::vector{0, 0, 0, 0, 0, 0}); - - std::vector input(25); - std::vector output(25); - std::iota(output.begin(), output.end(), 1.0f); - input = TransposeSerializedVector(output, 1, 1, 5, 5); - test.AddInput("input", {1, 5, 5}, input); - test.AddInput("image_shape", {3}, std::vector{1, 5, 5}); - test.AddInput("block_shape", {3}, std::vector{1, 1, 5}); - test.AddOutput("output", {1, 1, 1, 5, 5}, output); - test.Run(); -} - -} // namespace test -} // namespace contrib -} // namespace onnxruntime diff --git a/onnxruntime/test/python/contrib_ops/onnx_test_col2im.py b/onnxruntime/test/python/contrib_ops/onnx_test_col2im.py deleted file mode 100644 index 31c5f129fad6b..0000000000000 --- a/onnxruntime/test/python/contrib_ops/onnx_test_col2im.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -# Test reference implementation and model for ONNX Runtime conrtib op trilu - -import unittest - -import numpy as np -import onnx -from onnx_contrib_ops_helper import expect - - -class ONNXReferenceImplementationTest(unittest.TestCase): - def test_col2im(self) -> None: - inputs = np.array( - [ - [ - [1.0, 6.0, 11.0, 16.0, 21.0], # (1, 5, 5) - [2.0, 7.0, 12.0, 17.0, 22.0], - [3.0, 8.0, 13.0, 18.0, 23.0], - [4.0, 9.0, 14.0, 19.0, 24.0], - [5.0, 0.0, 15.0, 20.0, 25.0], - ] - ] - ).astype(np.float32) - image_shape = np.array([5, 5]).astype(np.int64) - block_shape = np.array([1, 5]).astype(np.int64) - node = onnx.helper.make_node( - "Col2Im", ["input", "image_shape", "block_shape"], ["col2im_reference_implementation"] - ) - - col2im_reference_implementation = np.array( - [ - [ - [ - [1.0, 2.0, 3.0, 4.0, 5.0], # (1, 1, 5, 5) - [6.0, 7.0, 8.0, 9.0, 0.0], - [11.0, 12.0, 13.0, 14.0, 15.0], - [16.0, 17.0, 18.0, 19.0, 20.0], - [21.0, 22.0, 23.0, 24.0, 25.0], - ] - ] - ] - ).astype(np.float32) - - expect( - node, - inputs=[inputs, image_shape, block_shape], - outputs=[col2im_reference_implementation], - name="test_col2im", - ) - - -if __name__ == "__main__": - unittest.main(module=__name__, buffer=True) diff --git a/onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_0.pb b/onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_0.pb deleted file mode 100644 index 164166b2c84e8c0968a316c70ceb85e9b5fea07e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 117 zcmd;J@x2I3Qr0LGRS@&Et; diff --git a/onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_1.pb b/onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_1.pb deleted file mode 100644 index e2e47c174ce48b0b6cc775ccbad84426c3925a39..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 35 fcmd;J5@2`Y&dg0rPmM3mNGwS85@2P302mDbe=`L} diff --git a/onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_2.pb b/onnxruntime/test/python/testdata/node/test_col2im/test_data_set_0/input_2.pb deleted file mode 100644 index c0b7595628c4bb8bd1859c490f6242ca6bdbf7cc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 35 gcmd;J5@2`YPRhwo&WQoO#oXPz20+Gu0V(5@+l l6vss*_afp583h$D#EV$`>v)F=GXgR;>^M+x;=+xJ$A70x6@35z diff --git a/onnxruntime/test/python/testdata/node/test_col2im_5d/test_data_set_0/input_0.pb b/onnxruntime/test/python/testdata/node/test_col2im_5d/test_data_set_0/input_0.pb deleted file mode 100644 index 0b66e3fbccc21c2a88060142326527a6fd6ca537..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 498 zcmWm4F(?FJ0EXfJLR~W4U@#aA27|$1Fev9TNQN5>27|#sNjID_7%oYYBuST~OVTAt zk}gS?q)U<{U6L+Il60fdV|ePT_lriN_0fh{BvCgpm6}cuRBA%V^n_51htSb$ljoja zVu=u1>0*#kf^UVmzQ_?}g4ReI-6R~B!C){L32vFx)ItN!|DNQDqHveCFrHrryWZMNHC!cM#FHff*z z4m#wpBaS-exD!sAHsg%5W}S211s7d%*%foHy5_nYZo1{Rd3W4(&w~3Nc<7PGo_K1} nGta%SCh$jHC c1Caj!hz)@30wA6N#0@|`14s_U2I2*d02b*GVE_OC diff --git a/onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_1.pb b/onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_1.pb deleted file mode 100644 index ed056b38ede071201a58c4d489ee72565a9de9e6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 35 fcmd;J5@2`Y&dg0rPmM3mNGwS85@2J102mDbe@g{O diff --git a/onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_2.pb b/onnxruntime/test/python/testdata/node/test_col2im_dilations/test_data_set_0/input_2.pb deleted file mode 100644 index ea04f67ddf5b80dd13a9f42589cd7104b5e46f7a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 35 fcmd;J5@2`YPRhwo&Wm@T6DQ&$b=c0U)Xk;@xhu+-7W|J8_u|5#FQsW-_*9pD@(pf jbbB;7=A0W!?s;bJTeB^g^T`iex*CU^a6!Qx54_}m`4%=I diff --git a/onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_1.pb b/onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_1.pb deleted file mode 100644 index e2e47c174ce48b0b6cc775ccbad84426c3925a39..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 35 fcmd;J5@2`Y&dg0rPmM3mNGwS85@2P302mDbe=`L} diff --git a/onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_2.pb b/onnxruntime/test/python/testdata/node/test_col2im_pads/test_data_set_0/input_2.pb deleted file mode 100644 index c0b7595628c4bb8bd1859c490f6242ca6bdbf7cc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 35 gcmd;J5@2`YPRhwo&W&l+oNV1vjM r;ppbc&GjHDmpl$!=n)svf(<)5thn*ug~y4clu~5)pJ;^!Eu!)RQ05ig diff --git a/onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_0.pb b/onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_0.pb deleted file mode 100644 index f33a7620e97e8b2934587759212fbf1350d5effd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 162 qcmd;J=_tH!59V*5I{E@RXG6prw@7n diff --git a/onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_1.pb b/onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_1.pb deleted file mode 100644 index e2e47c174ce48b0b6cc775ccbad84426c3925a39..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 35 fcmd;J5@2`Y&dg0rPmM3mNGwS85@2P302mDbe=`L} diff --git a/onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_2.pb b/onnxruntime/test/python/testdata/node/test_col2im_strides/test_data_set_0/input_2.pb deleted file mode 100644 index 19b497c93ccceed2813a63a90e568d62835d8ed1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 35 fcmd;J5@2`YPRhwo&W@x2I3Qr0LGRS@&Et; diff --git a/onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_1.pb b/onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_1.pb deleted file mode 100644 index e2e47c174ce48b0b6cc775ccbad84426c3925a39..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 35 fcmd;J5@2`Y&dg0rPmM3mNGwS85@2P302mDbe=`L} diff --git a/onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_2.pb b/onnxruntime/test/python/testdata/test_col2im/test_data_set_0/input_2.pb deleted file mode 100644 index c0b7595628c4bb8bd1859c490f6242ca6bdbf7cc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 35 gcmd;J5@2`YPRhwo&WST6h+bR0?8#CWCg?mft0DT0YT(PvV;vn4B(QIG9@J?WlGA7!6@83-J@rX zH8!zHYHpXNYjfwTLEk!PXtZ*^Z`ErU>(O=L{t}5-5h(}}3w9ikQY6KH8DHRmCwfFo OnDN1a6&t?T@heYoXdiw6 From a867572ee3d49479ac4d2a012ad2abd738015119 Mon Sep 17 00:00:00 2001 From: Liqun Fu Date: Wed, 11 Jan 2023 16:42:02 -0800 Subject: [PATCH 19/30] remove undeeded changes Signed-off-by: Liqun Fu --- onnxruntime/contrib_ops/cpu/col2im.cc | 22 ------------------- .../contrib_ops/cpu/cpu_contrib_kernels.cc | 2 -- .../tools/pytorch_export_contrib_ops.py | 13 ++++------- 3 files changed, 4 insertions(+), 33 deletions(-) delete mode 100644 onnxruntime/contrib_ops/cpu/col2im.cc diff --git a/onnxruntime/contrib_ops/cpu/col2im.cc b/onnxruntime/contrib_ops/cpu/col2im.cc deleted file mode 100644 index d6ed5495e49aa..0000000000000 --- a/onnxruntime/contrib_ops/cpu/col2im.cc +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. -#include "core/providers/cpu/tensor/col2im.h" -#include "core/providers/common.h" - -namespace onnxruntime { -namespace contrib { - -#define REGISTER_KERNEL_TYPED(T) \ - ONNX_OPERATOR_TYPED_KERNEL_EX( \ - Col2Im, \ - kMSDomain, \ - 1, \ - T, \ - kCpuExecutionProvider, \ - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ - Col2Im); - -REGISTER_KERNEL_TYPED(float) - -} // namespace contrib -} // namespace onnxruntime diff --git a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc index 7c02f4055ac8f..a04ef0d71b113 100644 --- a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc +++ b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc @@ -10,7 +10,6 @@ namespace contrib { class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, SampleOp); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, Col2Im); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, GridSample); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, Attention); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, float, BeamSearch); @@ -193,7 +192,6 @@ Status RegisterCpuContribKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, // add more kernels here - BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/python/tools/pytorch_export_contrib_ops.py b/onnxruntime/python/tools/pytorch_export_contrib_ops.py index 7df091df53dd5..d3c537035f6ab 100644 --- a/onnxruntime/python/tools/pytorch_export_contrib_ops.py +++ b/onnxruntime/python/tools/pytorch_export_contrib_ops.py @@ -1,4 +1,4 @@ -# Copyright(c) Microsoft Corporation.All rights reserved. +# Copyright(c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. """ @@ -49,10 +49,10 @@ def grid_sampler(g, input, grid, mode, padding_mode, align_corners): align_corners = int(symbolic_helper._maybe_get_const(align_corners, "b")) # From opset v13 onward, the output shape can be specified with - # (N, C, H, W)(N, H_out, W_out, 2) =>(N, C, H_out, W_out) + # (N, C, H, W) (N, H_out, W_out, 2) => (N, C, H_out, W_out) # input_shape = input.type().sizes() # gird_shape = grid.type().sizes() - # output_shape = input_shape[ : 2] + gird_shape[1 : 3] + # output_shape = input_shape[:2] + gird_shape[1:3] # g.op(...).setType(input.type().with_sizes(output_shape)) return g.op( @@ -73,7 +73,7 @@ def inverse(g, self): @torch.onnx.symbolic_helper.parse_args("v", "s") def gelu(g, self: torch._C.Value, approximate: str = "none"): - # Use microsoft::Gelu for performance if possible.It only supports approximate == "none" + # Use microsoft::Gelu for performance if possible. It only supports approximate == "none" if approximate == "none": return g.op("com.microsoft::Gelu", self).setType(self.type()) return torch.onnx.symbolic_opset9.gelu(g, self, approximate) @@ -90,11 +90,6 @@ def tril(g, self, diagonal): _reg(tril) - def col2im(g, self: torch._C.Value, image_shape, block_shape): - return g.op("com.microsoft::Col2Im", self, image_shape, block_shape) - - _reg(col2im) - def unregister(): """Unregister ONNX Runtime's built-in contrib ops.""" From 028b5550e20e31c957038b3042ae75f187de7ef5 Mon Sep 17 00:00:00 2001 From: Liqun Fu Date: Wed, 11 Jan 2023 22:30:19 -0800 Subject: [PATCH 20/30] fix col2imnd Signed-off-by: Liqun Fu --- .../core/providers/cpu/tensor/col2im.cc | 66 +++++++++++-------- .../core/providers/cpu/tensor/col2im.h | 13 +++- .../providers/cpu/tensor/col2im_attributes.h | 47 ------------- .../tools/pytorch_export_contrib_ops.py | 2 +- .../onnx_backend_test_series_filters.jsonc | 1 - 5 files changed, 51 insertions(+), 78 deletions(-) delete mode 100644 onnxruntime/core/providers/cpu/tensor/col2im_attributes.h diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index bf91aa82d31cb..c022fe77b6650 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -2,10 +2,12 @@ // Licensed under the MIT License. #include "core/providers/cpu/tensor/col2im.h" +#include "core/util/math.h" #include "core/util/math_cpuonly.h" namespace onnxruntime { +// math::Col2im and math::Col2imNd only support float data type ONNX_CPU_OPERATOR_KERNEL( Col2Im, 18, @@ -18,28 +20,40 @@ Status Col2Im::Compute(OpKernelContext* context) const { const auto* image_shape = context->Input(1); const auto* kernel_shape = context->Input(2); - size_t image_dim_size = image_shape->Shape().Size(); - TensorShapeVector pads = col2im_attrs_.pads; - TensorShapeVector dilations = col2im_attrs_.dilations; - TensorShapeVector strides = col2im_attrs_.strides; - - if (dilations.empty()) { - dilations.resize(image_dim_size, 1); + size_t image_dim_number = image_shape->Shape().Size(); + TensorShapeVector dilations; + if (dilations_.empty()) { + dilations.resize(image_dim_number, 1); + } else { + ORT_ENFORCE(dilations_.size() == image_dim_number, "size of 'dilations' attribute, if provided, should equal to the number of image dimmensions."); + dilations = dilations_; } - if (pads.empty()) { - pads.resize(image_dim_size * 2, 0); + + TensorShapeVector pads; + if (pads_.empty()) { + pads.resize(image_dim_number * 2, 0); + } else { + ORT_ENFORCE(pads_.size() == 2 * image_dim_number, "size of 'pads' attribute, if provided, should equal to twice the number of image dimmensions."); + pads = pads_; } - if (strides.empty()) { - strides.resize(image_dim_size, 1); + + TensorShapeVector strides; + if (strides_.empty()) { + strides.resize(image_dim_number, 1); + } else { + ORT_ENFORCE(strides_.size() == image_dim_number, "size of 'strides' attribute, if provided, should equal to the number of image dimmensions."); + strides = strides_; } int64_t image_shape_size = 1; int64_t kernel_shape_size = 1; TensorShapeVector adjusted_kernel_shape_dims; - for (auto i = 0; i < image_shape->Shape().Size(); ++i) { - image_shape_size *= image_shape->Data()[i]; - kernel_shape_size *= kernel_shape->Data()[i]; - adjusted_kernel_shape_dims.push_back(dilations[i] * (kernel_shape->Data()[i] - 1) + 1); + auto image_dims = image_shape->Data(); + auto kernel_dims = kernel_shape->Data(); + for (auto i = 0; i < image_dim_number; ++i) { + image_shape_size *= image_dims[i]; + kernel_shape_size *= kernel_dims[i]; + adjusted_kernel_shape_dims.push_back(dilations[i] * (kernel_dims[i] - 1) + 1); } TensorShape col_shape = col_tensor->Shape(); const auto N = col_shape[0]; @@ -50,23 +64,23 @@ Status Col2Im::Compute(OpKernelContext* context) const { TensorShapeVector batched_image_shape_dims, adjusted_image_shape_dims; batched_image_shape_dims.insert(batched_image_shape_dims.begin(), {N, C}); - for (auto i = 0; i < image_shape->Shape()[0]; ++i) { - batched_image_shape_dims.push_back(image_shape->Data()[i]); - adjusted_image_shape_dims.push_back(image_shape->Data()[i] - adjusted_kernel_shape[i] + 1); + for (auto i = 0; i < image_dim_number; ++i) { + batched_image_shape_dims.push_back(image_dims[i]); + adjusted_image_shape_dims.push_back(image_dims[i] - adjusted_kernel_shape[i] + 1); } TensorShape batched_image_shape(batched_image_shape_dims); T* image_data = context->Output(0, batched_image_shape)->template MutableData(); const T* col_data = col_tensor->template Data(); for (auto image_id = 0; image_id < N; ++image_id) { - if (image_shape->Shape()[0] == 2) { + if (image_dim_number == 2) { math::Col2im( col_data + image_id * col_data_stride, C, - image_shape->Data()[0], - image_shape->Data()[1], - kernel_shape->Data()[0], - kernel_shape->Data()[1], + image_dims[0], + image_dims[1], + kernel_dims[0], + kernel_dims[1], dilations[0], dilations[1], pads[0], @@ -80,15 +94,15 @@ Status Col2Im::Compute(OpKernelContext* context) const { } else { math::Col2imNd( col_data + image_id * col_data_stride, - image_shape->Data(), + image_dims, adjusted_image_shape_dims.data(), kernel_shape_size * C, - image_shape_size, + image_shape_size * C, adjusted_kernel_shape.GetDims().data(), strides.data(), dilations.data(), pads.data(), - image_shape->Shape().Size(), + image_dim_number, image_data + image_id * col_stride, &CPUMathUtil::Instance()); } diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.h b/onnxruntime/core/providers/cpu/tensor/col2im.h index 50bdad46a72a0..2f2894a7f22fc 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.h +++ b/onnxruntime/core/providers/cpu/tensor/col2im.h @@ -4,20 +4,27 @@ #pragma once #include "core/framework/op_kernel.h" -#include "core/providers/cpu/tensor/col2im_attributes.h" namespace onnxruntime { template class Col2Im final : public OpKernel { public: - explicit Col2Im(const OpKernelInfo& info) : OpKernel(info), col2im_attrs_(info) { + explicit Col2Im(const OpKernelInfo& info) : OpKernel(info) { + if (!info.GetAttrs("strides", strides_).IsOK()) + ORT_ENFORCE(strides_.empty()); + if (!info.GetAttrs("dilations", dilations_).IsOK()) + ORT_ENFORCE(dilations_.empty()); + if (!info.GetAttrs("pads", pads_).IsOK()) + ORT_ENFORCE(pads_.empty()); } Status Compute(OpKernelContext* context) const override; private: - Col2ImAttributes col2im_attrs_; + TensorShapeVector pads_; + TensorShapeVector dilations_; + TensorShapeVector strides_; }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h b/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h deleted file mode 100644 index c8e01e396656a..0000000000000 --- a/onnxruntime/core/providers/cpu/tensor/col2im_attributes.h +++ /dev/null @@ -1,47 +0,0 @@ -/** -* Copyright (c) 2016-present, Facebook, Inc. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -/* Modifications Copyright (c) Microsoft. */ - -#pragma once - -#include "core/common/common.h" -#include "core/providers/common.h" -#include "core/util/math.h" - -#include "core/common/inlined_containers.h" -#include "core/framework/op_kernel.h" -#include "core/framework/op_node_proto_helper.h" - -namespace onnxruntime { - -struct Col2ImAttributes { - explicit Col2ImAttributes(const OpKernelInfo& info) { - if (!info.GetAttrs("strides", strides).IsOK()) - ORT_ENFORCE(strides.empty()); - if (!info.GetAttrs("dilations", dilations).IsOK()) - ORT_ENFORCE(dilations.empty()); - if (!info.GetAttrs("pads", pads).IsOK()) - ORT_ENFORCE(pads.empty()); - } - - ~Col2ImAttributes() = default; - - TensorShapeVector pads; - TensorShapeVector dilations; - TensorShapeVector strides; -}; - -} // namespace onnxruntime diff --git a/onnxruntime/python/tools/pytorch_export_contrib_ops.py b/onnxruntime/python/tools/pytorch_export_contrib_ops.py index d3c537035f6ab..9af57eda6ae90 100644 --- a/onnxruntime/python/tools/pytorch_export_contrib_ops.py +++ b/onnxruntime/python/tools/pytorch_export_contrib_ops.py @@ -1,4 +1,4 @@ -# Copyright(c) Microsoft Corporation. All rights reserved. +# Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. """ diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index 6d49d33e94841..503ca8de0eba4 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -122,7 +122,6 @@ "^test_center_crop_pad_*", "^test_clip_default_int8_max_expanded_cpu", "^test_clip_default_int8_min_expanded_cpu", - "^test_col2im_*", "^test_constant_pad_axes_cpu", "^test_constant_pad_cpu", "^test_edge_pad_cpu", From ffa42f22bae91beebd6fad6cde3142ce01349f50 Mon Sep 17 00:00:00 2001 From: Thiago Crepaldi Date: Wed, 18 Jan 2023 13:12:32 -0500 Subject: [PATCH 21/30] Linting --- onnxruntime/core/providers/cpu/tensor/col2im.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index c022fe77b6650..013c2d1f7933b 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -50,7 +50,7 @@ Status Col2Im::Compute(OpKernelContext* context) const { TensorShapeVector adjusted_kernel_shape_dims; auto image_dims = image_shape->Data(); auto kernel_dims = kernel_shape->Data(); - for (auto i = 0; i < image_dim_number; ++i) { + for (size_t i = 0; i < image_dim_number; ++i) { image_shape_size *= image_dims[i]; kernel_shape_size *= kernel_dims[i]; adjusted_kernel_shape_dims.push_back(dilations[i] * (kernel_dims[i] - 1) + 1); @@ -64,7 +64,7 @@ Status Col2Im::Compute(OpKernelContext* context) const { TensorShapeVector batched_image_shape_dims, adjusted_image_shape_dims; batched_image_shape_dims.insert(batched_image_shape_dims.begin(), {N, C}); - for (auto i = 0; i < image_dim_number; ++i) { + for (size_t i = 0; i < image_dim_number; ++i) { batched_image_shape_dims.push_back(image_dims[i]); adjusted_image_shape_dims.push_back(image_dims[i] - adjusted_kernel_shape[i] + 1); } From c1a77f9cd60e8280a0d14e4019d8c50ba5ba18d1 Mon Sep 17 00:00:00 2001 From: Liqun Fu Date: Thu, 19 Jan 2023 15:01:45 -0800 Subject: [PATCH 22/30] to use narrow cast Signed-off-by: Liqun Fu --- onnxruntime/core/providers/cpu/tensor/col2im.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/cpu/tensor/col2im.cc b/onnxruntime/core/providers/cpu/tensor/col2im.cc index 013c2d1f7933b..b2e7d1c8e0bad 100644 --- a/onnxruntime/core/providers/cpu/tensor/col2im.cc +++ b/onnxruntime/core/providers/cpu/tensor/col2im.cc @@ -20,7 +20,7 @@ Status Col2Im::Compute(OpKernelContext* context) const { const auto* image_shape = context->Input(1); const auto* kernel_shape = context->Input(2); - size_t image_dim_number = image_shape->Shape().Size(); + size_t image_dim_number = onnxruntime::narrow(image_shape->Shape().Size()); TensorShapeVector dilations; if (dilations_.empty()) { dilations.resize(image_dim_number, 1); From ac4e2f17702661a790cd6defd67e567eb4d66798 Mon Sep 17 00:00:00 2001 From: Liqun Fu Date: Thu, 19 Jan 2023 15:16:46 -0800 Subject: [PATCH 23/30] remove test_col2im_pads Signed-off-by: Liqun Fu --- onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc | 1 + 1 file changed, 1 insertion(+) diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index d5bf210804f18..3d937e87ee712 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -102,6 +102,7 @@ "^test_if_opt", "^test_loop16_seq_none", "^test_identity_opt", + "^test_col2im_pads*", // remove this when using ONNX with this: https://github.com/onnx/onnx/pull/4769 // Following tests are for opset 16 ops and are not yet implemented in ORT "^test_roialign_aligned_*", //GPU failures From a44b07dbb418181f80963c5c4b34dc2adafd543a Mon Sep 17 00:00:00 2001 From: Liqun Fu Date: Thu, 19 Jan 2023 18:47:29 -0800 Subject: [PATCH 24/30] disable android col2im_pads tests, update doc Signed-off-by: Liqun Fu --- docs/ContribOperators.md | 54 ----------------------------------- docs/OperatorKernels.md | 2 +- onnxruntime/test/onnx/main.cc | 1 + 3 files changed, 2 insertions(+), 55 deletions(-) diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md index 55d63ea551ce3..7f327c80cf989 100644 --- a/docs/ContribOperators.md +++ b/docs/ContribOperators.md @@ -13,7 +13,6 @@ Do not modify directly.* * com.microsoft.BitmaskBiasDropout * com.microsoft.BitmaskDropout * com.microsoft.CDist - * com.microsoft.Col2Im * com.microsoft.ComplexMul * com.microsoft.ComplexMulConj * com.microsoft.ConvTransposeWithDynamicPads @@ -795,59 +794,6 @@ This version of the operator has been available since version 1 of the 'com.micr -### **com.microsoft.Col2Im** - - The operator rearranges column blocks back into a multidimensional image - - Col2Im behaves similarly to PyTorch's fold https://pytorch.org/docs/stable/generated/torch.nn.Fold.html, - but it only supports *batched* multi-dimensional image tensors. - - NOTE: Although specifying image_shape looks redundant because it could be calculated from - convolution formulas, it is required as input for more advanced scenarios as explained - at PyTorch's implementation (https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/Col2Im.cpp#L10) - - -#### Version - -This version of the operator has been available since version 1 of the 'com.microsoft' operator set. - -#### Attributes - -
-
dilations : list of ints
-
1-dimensional tensor with dilation value along each spatial axis of the image. If not present, the dilation defaults to 1 along each spatial axis of the image.
-
pads : list of ints
-
1-dimensional tensor with padding value for the beginning and ending along each spatial axis, it can take any value greater than or equal to 0. The value represent the number of pixels added to the beginning and end part of the corresponding axis. `pads` format should be as follow [x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin is the number of pixels added at the beginning of axis `i` and xi_end the same for the end of axis `i`. If not present, the padding defaults to 0 along start and end of each spatial axis.
-
strides : list of ints
-
1-dimensional tensor with stride value along each spatial axis. If not present, the stride defaults to 1 along each spatial axis.
-
- -#### Inputs - -
-
input : T
-
Input data tensor to be rearranged from column blocks back into an image. This is a 3-dimensional tensor containing [N, C * n-ary-product(block_shape), L], where N is batch dimension, C is image channel dimension and L is number of blocks.
-
image_shape : tensor(int64)
-
The shape of the spatial dimensions of the image after rearranging the column blocks.This is a 1-dim tensor with size of at least 2, containing the value [H_img, W_img] for a 2-D image or [dim_i1, dim_i2, ..., dim_iN] for a N-D image.
-
block_shape : tensor(int64)
-
The shape of the block to apply on the input.This is a 1-dim tensor of size of at least 2, containing the value [H_block, W_block] for a 2-D image or [dim_b1, dim_b2, ..., dim_bN] for a N-D block.Dilations, pads and strides are applied to block_shape under the hood.The kernel window start at the top-left of the block and slides to the right and down,similarly to how Convolution kernels do.
-
- -#### Outputs - -
-
output : T
-
Output tensor produced by rearranging blocks into an image.
-
- -#### Type Constraints - -
-
T : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16), tensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64), tensor(complex128)
-
Constrain input and output types to all numeric tensor types.
-
- - ### **com.microsoft.ComplexMul** #### Version diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index 1090d55129039..66f441e6db0f6 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -54,6 +54,7 @@ Do not modify directly.* |||12|**T** = tensor(double), tensor(float), tensor(int64), tensor(int8), tensor(uint64), tensor(uint8)| |||11|**T** = tensor(float)| |||[6, 10]|**T** = tensor(float)| +|Col2Im|*in* input:**T**
*in* image_shape:**tensor(int64)**
*in* block_shape:**tensor(int64)**
*out* output:**T**|18+|**T** = tensor(float)| |Compress|*in* input:**T**
*in* condition:**T1**
*out* output:**T**|11+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| |||[9, 10]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| |Concat|*in* inputs:**T**
*out* concat_result:**T**|13+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| @@ -416,7 +417,6 @@ Do not modify directly.* |BiasGelu|*in* A:**T**
*in* B:**T**
*out* C:**T**|1+|**T** = tensor(float)| |BifurcationDetector|*in* src_tokens:**T**
*in* cur_tokens:**T**
*in* prev_suffix_match_idx:**T**
*in* pred_tokens:**T**
*out* tokens:**T**
*out* suffix_match_idx:**T**|1+|**T** = tensor(int64)| |CDist|*in* A:**T**
*in* B:**T**
*out* C:**T**|1+|**T** = tensor(double), tensor(float)| -|Col2Im|*in* input:**T**
*in* image_shape:**tensor(int64)**
*in* block_shape:**tensor(int64)**
*out* output:**T**|1+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |ConvTransposeWithDynamicPads|*in* X:**T**
*in* W:**T**
*in* Pads:**tensor(int64)**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float)| |CropAndResize|*in* X:**T1**
*in* rois:**T1**
*in* batch_indices:**T2**
*in* crop_size:**T2**
*out* Y:**T1**|1+|**T1** = tensor(float)
**T2** = tensor(int32)| |DequantizeLinear|*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T2**|1+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(float)| diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 82298db8191ae..fecc9bf8320d4 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -685,6 +685,7 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); {"test_scatternd_add", "Opset 16 not supported yet."}, {"test_scatternd_multiply", "Opset 16 not supported yet."}, {"test_scatter_elements_with_duplicate_indices", "Opset 16 not supported yet."}, + {"test_col2im_pad", "onnx 18 test data error."}, #if defined(DISABLE_OPTIONAL_TYPE) {"test_optional_get_element", "Optional type not supported in this build flavor."}, From ab8ec1d60509b7dfcc5e1b99445bd63a2b997dbe Mon Sep 17 00:00:00 2001 From: Liqun Fu Date: Fri, 20 Jan 2023 10:05:45 -0800 Subject: [PATCH 25/30] typo Signed-off-by: Liqun Fu --- onnxruntime/test/onnx/main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index fecc9bf8320d4..5c099c18b041c 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -685,7 +685,7 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); {"test_scatternd_add", "Opset 16 not supported yet."}, {"test_scatternd_multiply", "Opset 16 not supported yet."}, {"test_scatter_elements_with_duplicate_indices", "Opset 16 not supported yet."}, - {"test_col2im_pad", "onnx 18 test data error."}, + {"test_col2im_pads", "onnx 18 test data error."}, #if defined(DISABLE_OPTIONAL_TYPE) {"test_optional_get_element", "Optional type not supported in this build flavor."}, From 7dc5f1ce9e79f90a5a4664ff662e7120df98257a Mon Sep 17 00:00:00 2001 From: Liqun Fu Date: Fri, 20 Jan 2023 11:38:08 -0800 Subject: [PATCH 26/30] col2im_pads Signed-off-by: Liqun Fu --- onnxruntime/test/onnx/main.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 5c099c18b041c..922c8a4c5047a 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -686,6 +686,7 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); {"test_scatternd_multiply", "Opset 16 not supported yet."}, {"test_scatter_elements_with_duplicate_indices", "Opset 16 not supported yet."}, {"test_col2im_pads", "onnx 18 test data error."}, + {"col2im_pads", "onnx 18 test data error."}, #if defined(DISABLE_OPTIONAL_TYPE) {"test_optional_get_element", "Optional type not supported in this build flavor."}, From ac500b6ba03161f48b2973365bd7c5d7bd0aa2d1 Mon Sep 17 00:00:00 2001 From: Liqun Fu Date: Fri, 20 Jan 2023 12:26:16 -0800 Subject: [PATCH 27/30] remove test_col2im_pads Signed-off-by: Liqun Fu --- onnxruntime/test/onnx/main.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 922c8a4c5047a..11d635519dfe7 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -685,7 +685,6 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); {"test_scatternd_add", "Opset 16 not supported yet."}, {"test_scatternd_multiply", "Opset 16 not supported yet."}, {"test_scatter_elements_with_duplicate_indices", "Opset 16 not supported yet."}, - {"test_col2im_pads", "onnx 18 test data error."}, {"col2im_pads", "onnx 18 test data error."}, #if defined(DISABLE_OPTIONAL_TYPE) From 00b5555b7145cb1918d3d83bb7506b474a36f5b5 Mon Sep 17 00:00:00 2001 From: Liqun Fu Date: Mon, 23 Jan 2023 13:29:12 -0800 Subject: [PATCH 28/30] bring back test Signed-off-by: Liqun Fu --- .../test/providers/cpu/tensor/col2im_test.cc | 169 ++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 onnxruntime/test/providers/cpu/tensor/col2im_test.cc diff --git a/onnxruntime/test/providers/cpu/tensor/col2im_test.cc b/onnxruntime/test/providers/cpu/tensor/col2im_test.cc new file mode 100644 index 0000000000000..3a4539024e5a9 --- /dev/null +++ b/onnxruntime/test/providers/cpu/tensor/col2im_test.cc @@ -0,0 +1,169 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" + +#include "core/util/math.h" + +namespace onnxruntime { +namespace test { + +namespace { +template +std::vector TransposeSerializedVector(std::vector& input, size_t N, size_t C, size_t H, size_t W) { + size_t input_size = input.size(); + if (input_size == 0) { + throw std::runtime_error("Invalid input"); + } + std::vector trans_vec(input); + + for (size_t n = 0; n < N; ++n) + for (size_t c = 0; c < C; ++c) + for (size_t h = 0; h < H; ++h) + for (size_t w = 0; w < W; ++w) + trans_vec[n * (C * H * W) + c * (H * W) + (h + H * w)] = + input[n * (C * H * W) + c * (H * W) + (w + W * h)]; + + return trans_vec; +} + +} // namespace + +TEST(Col2ImOpTest, Simple4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(25); + std::vector output(25); + std::iota(output.begin(), output.end(), 1.0f); + + input = TransposeSerializedVector(output, 1, 1, 5, 5); + test.AddInput("input", {1, 5, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {1, 1, 5, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, With2Images3channelsNonSquare4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(120); + std::vector output(120); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 2, 3, 4, 5); + test.AddInput("input", {2, 15, 4}, input); + test.AddInput("image_shape", {2}, std::vector{4, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {2, 3, 4, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, With2Images2channelsNonSquareDilationPadStride4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{2, 2}); + test.AddAttribute("dilations", std::vector{2, 2}); + test.AddAttribute("pads", std::vector{2, 2, 2, 2}); + + std::vector input{0., 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., + 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., + 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., + 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., + 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., 0.}; + std::vector output{2., 0., 6., 0., 10., + 0., 0., 0., 0., 0., + 22., 0., 26., 0., 30., + 0., 0., 0., 0., 0., + 42., 0., 46., 0., 50., + 0., 0., 0., 0., 0., + 62., 0., 66., 0., 70., + 0., 0., 0., 0., 0., + 82., 0., 86., 0., 90., + 0., 0., 0., 0., 0., + 102., 0., 106., 0., 110., + 0., 0., 0., 0., 0., + 122., 0., 126., 0., 130., + 0., 0., 0., 0., 0., + 142., 0., 146., 0., 150., + 0., 0., 0., 0., 0.}; + test.AddInput("input", {2, 4, 16}, input); + test.AddInput("image_shape", {2}, std::vector{4, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 2}); + + test.AddOutput("output", {2, 2, 4, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, With3channels4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(75); + std::vector output(75); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 1, 3, 5, 5); + test.AddInput("input", {1, 15, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {1, 3, 5, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, With2Images3channels4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(150); + std::vector output(150); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 2, 3, 5, 5); + test.AddInput("input", {2, 15, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {2, 3, 5, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, Simple5dNCHWD) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1, 1}); + test.AddAttribute("dilations", std::vector{1, 1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0, 0, 0}); + + std::vector input(25); + std::vector output(25); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 1, 1, 5, 5); + test.AddInput("input", {1, 5, 5}, input); + test.AddInput("image_shape", {3}, std::vector{1, 5, 5}); + test.AddInput("block_shape", {3}, std::vector{1, 1, 5}); + test.AddOutput("output", {1, 1, 1, 5, 5}, output); + test.Run(); +} + +} // namespace test +} // namespace onnxruntime From 7a8b8bcbea17a2281ad33e49be72e99012d55c0f Mon Sep 17 00:00:00 2001 From: Liqun Fu Date: Tue, 24 Jan 2023 12:22:05 -0800 Subject: [PATCH 29/30] remove col2im_test.cc to experiment React Native CI Signed-off-by: Liqun Fu --- .../test/providers/cpu/tensor/col2im_test.cc | 169 ------------------ 1 file changed, 169 deletions(-) delete mode 100644 onnxruntime/test/providers/cpu/tensor/col2im_test.cc diff --git a/onnxruntime/test/providers/cpu/tensor/col2im_test.cc b/onnxruntime/test/providers/cpu/tensor/col2im_test.cc deleted file mode 100644 index 3a4539024e5a9..0000000000000 --- a/onnxruntime/test/providers/cpu/tensor/col2im_test.cc +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include -#include "gtest/gtest.h" -#include "test/providers/provider_test_utils.h" - -#include "core/util/math.h" - -namespace onnxruntime { -namespace test { - -namespace { -template -std::vector TransposeSerializedVector(std::vector& input, size_t N, size_t C, size_t H, size_t W) { - size_t input_size = input.size(); - if (input_size == 0) { - throw std::runtime_error("Invalid input"); - } - std::vector trans_vec(input); - - for (size_t n = 0; n < N; ++n) - for (size_t c = 0; c < C; ++c) - for (size_t h = 0; h < H; ++h) - for (size_t w = 0; w < W; ++w) - trans_vec[n * (C * H * W) + c * (H * W) + (h + H * w)] = - input[n * (C * H * W) + c * (H * W) + (w + W * h)]; - - return trans_vec; -} - -} // namespace - -TEST(Col2ImOpTest, Simple4dNCHW) { - OpTester test("Col2Im", 18); - - test.AddAttribute("strides", std::vector{1, 1}); - test.AddAttribute("dilations", std::vector{1, 1}); - test.AddAttribute("pads", std::vector{0, 0, 0, 0}); - - std::vector input(25); - std::vector output(25); - std::iota(output.begin(), output.end(), 1.0f); - - input = TransposeSerializedVector(output, 1, 1, 5, 5); - test.AddInput("input", {1, 5, 5}, input); - test.AddInput("image_shape", {2}, std::vector{5, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 5}); - - test.AddOutput("output", {1, 1, 5, 5}, output); - test.Run(); -} - -TEST(Col2ImOpTest, With2Images3channelsNonSquare4dNCHW) { - OpTester test("Col2Im", 18); - - test.AddAttribute("strides", std::vector{1, 1}); - test.AddAttribute("dilations", std::vector{1, 1}); - test.AddAttribute("pads", std::vector{0, 0, 0, 0}); - - std::vector input(120); - std::vector output(120); - std::iota(output.begin(), output.end(), 1.0f); - input = TransposeSerializedVector(output, 2, 3, 4, 5); - test.AddInput("input", {2, 15, 4}, input); - test.AddInput("image_shape", {2}, std::vector{4, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 5}); - - test.AddOutput("output", {2, 3, 4, 5}, output); - test.Run(); -} - -TEST(Col2ImOpTest, With2Images2channelsNonSquareDilationPadStride4dNCHW) { - OpTester test("Col2Im", 18); - - test.AddAttribute("strides", std::vector{2, 2}); - test.AddAttribute("dilations", std::vector{2, 2}); - test.AddAttribute("pads", std::vector{2, 2, 2, 2}); - - std::vector input{0., 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., - 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., - 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., - 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., - 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., 0.}; - std::vector output{2., 0., 6., 0., 10., - 0., 0., 0., 0., 0., - 22., 0., 26., 0., 30., - 0., 0., 0., 0., 0., - 42., 0., 46., 0., 50., - 0., 0., 0., 0., 0., - 62., 0., 66., 0., 70., - 0., 0., 0., 0., 0., - 82., 0., 86., 0., 90., - 0., 0., 0., 0., 0., - 102., 0., 106., 0., 110., - 0., 0., 0., 0., 0., - 122., 0., 126., 0., 130., - 0., 0., 0., 0., 0., - 142., 0., 146., 0., 150., - 0., 0., 0., 0., 0.}; - test.AddInput("input", {2, 4, 16}, input); - test.AddInput("image_shape", {2}, std::vector{4, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 2}); - - test.AddOutput("output", {2, 2, 4, 5}, output); - test.Run(); -} - -TEST(Col2ImOpTest, With3channels4dNCHW) { - OpTester test("Col2Im", 18); - - test.AddAttribute("strides", std::vector{1, 1}); - test.AddAttribute("dilations", std::vector{1, 1}); - test.AddAttribute("pads", std::vector{0, 0, 0, 0}); - - std::vector input(75); - std::vector output(75); - std::iota(output.begin(), output.end(), 1.0f); - input = TransposeSerializedVector(output, 1, 3, 5, 5); - test.AddInput("input", {1, 15, 5}, input); - test.AddInput("image_shape", {2}, std::vector{5, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 5}); - - test.AddOutput("output", {1, 3, 5, 5}, output); - test.Run(); -} - -TEST(Col2ImOpTest, With2Images3channels4dNCHW) { - OpTester test("Col2Im", 18); - - test.AddAttribute("strides", std::vector{1, 1}); - test.AddAttribute("dilations", std::vector{1, 1}); - test.AddAttribute("pads", std::vector{0, 0, 0, 0}); - - std::vector input(150); - std::vector output(150); - std::iota(output.begin(), output.end(), 1.0f); - input = TransposeSerializedVector(output, 2, 3, 5, 5); - test.AddInput("input", {2, 15, 5}, input); - test.AddInput("image_shape", {2}, std::vector{5, 5}); - test.AddInput("block_shape", {2}, std::vector{1, 5}); - - test.AddOutput("output", {2, 3, 5, 5}, output); - test.Run(); -} - -TEST(Col2ImOpTest, Simple5dNCHWD) { - OpTester test("Col2Im", 18); - - test.AddAttribute("strides", std::vector{1, 1, 1}); - test.AddAttribute("dilations", std::vector{1, 1, 1}); - test.AddAttribute("pads", std::vector{0, 0, 0, 0, 0, 0}); - - std::vector input(25); - std::vector output(25); - std::iota(output.begin(), output.end(), 1.0f); - input = TransposeSerializedVector(output, 1, 1, 5, 5); - test.AddInput("input", {1, 5, 5}, input); - test.AddInput("image_shape", {3}, std::vector{1, 5, 5}); - test.AddInput("block_shape", {3}, std::vector{1, 1, 5}); - test.AddOutput("output", {1, 1, 1, 5, 5}, output); - test.Run(); -} - -} // namespace test -} // namespace onnxruntime From bc25103f3b1ee6e92dc76c147819529f68c442f0 Mon Sep 17 00:00:00 2001 From: Liqun Fu Date: Tue, 24 Jan 2023 12:29:04 -0800 Subject: [PATCH 30/30] add col2im_test.cc back because the main branch is having the same error with ReactNative CI Signed-off-by: Liqun Fu --- .../test/providers/cpu/tensor/col2im_test.cc | 169 ++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 onnxruntime/test/providers/cpu/tensor/col2im_test.cc diff --git a/onnxruntime/test/providers/cpu/tensor/col2im_test.cc b/onnxruntime/test/providers/cpu/tensor/col2im_test.cc new file mode 100644 index 0000000000000..3a4539024e5a9 --- /dev/null +++ b/onnxruntime/test/providers/cpu/tensor/col2im_test.cc @@ -0,0 +1,169 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" + +#include "core/util/math.h" + +namespace onnxruntime { +namespace test { + +namespace { +template +std::vector TransposeSerializedVector(std::vector& input, size_t N, size_t C, size_t H, size_t W) { + size_t input_size = input.size(); + if (input_size == 0) { + throw std::runtime_error("Invalid input"); + } + std::vector trans_vec(input); + + for (size_t n = 0; n < N; ++n) + for (size_t c = 0; c < C; ++c) + for (size_t h = 0; h < H; ++h) + for (size_t w = 0; w < W; ++w) + trans_vec[n * (C * H * W) + c * (H * W) + (h + H * w)] = + input[n * (C * H * W) + c * (H * W) + (w + W * h)]; + + return trans_vec; +} + +} // namespace + +TEST(Col2ImOpTest, Simple4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(25); + std::vector output(25); + std::iota(output.begin(), output.end(), 1.0f); + + input = TransposeSerializedVector(output, 1, 1, 5, 5); + test.AddInput("input", {1, 5, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {1, 1, 5, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, With2Images3channelsNonSquare4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(120); + std::vector output(120); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 2, 3, 4, 5); + test.AddInput("input", {2, 15, 4}, input); + test.AddInput("image_shape", {2}, std::vector{4, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {2, 3, 4, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, With2Images2channelsNonSquareDilationPadStride4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{2, 2}); + test.AddAttribute("dilations", std::vector{2, 2}); + test.AddAttribute("pads", std::vector{2, 2, 2, 2}); + + std::vector input{0., 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., + 0., 0., 0., 0., 1., 3., 5., 0., 11., 13., 15., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., + 0., 0., 0., 0., 21., 23., 25., 0., 31., 33., 35., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., + 0., 0., 0., 0., 41., 43., 45., 0., 51., 53., 55., 0., 0., 0., 0., 0., + 0., 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., + 0., 0., 0., 0., 61., 63., 65., 0., 71., 73., 75., 0., 0., 0., 0., 0.}; + std::vector output{2., 0., 6., 0., 10., + 0., 0., 0., 0., 0., + 22., 0., 26., 0., 30., + 0., 0., 0., 0., 0., + 42., 0., 46., 0., 50., + 0., 0., 0., 0., 0., + 62., 0., 66., 0., 70., + 0., 0., 0., 0., 0., + 82., 0., 86., 0., 90., + 0., 0., 0., 0., 0., + 102., 0., 106., 0., 110., + 0., 0., 0., 0., 0., + 122., 0., 126., 0., 130., + 0., 0., 0., 0., 0., + 142., 0., 146., 0., 150., + 0., 0., 0., 0., 0.}; + test.AddInput("input", {2, 4, 16}, input); + test.AddInput("image_shape", {2}, std::vector{4, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 2}); + + test.AddOutput("output", {2, 2, 4, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, With3channels4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(75); + std::vector output(75); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 1, 3, 5, 5); + test.AddInput("input", {1, 15, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {1, 3, 5, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, With2Images3channels4dNCHW) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1}); + test.AddAttribute("dilations", std::vector{1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0}); + + std::vector input(150); + std::vector output(150); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 2, 3, 5, 5); + test.AddInput("input", {2, 15, 5}, input); + test.AddInput("image_shape", {2}, std::vector{5, 5}); + test.AddInput("block_shape", {2}, std::vector{1, 5}); + + test.AddOutput("output", {2, 3, 5, 5}, output); + test.Run(); +} + +TEST(Col2ImOpTest, Simple5dNCHWD) { + OpTester test("Col2Im", 18); + + test.AddAttribute("strides", std::vector{1, 1, 1}); + test.AddAttribute("dilations", std::vector{1, 1, 1}); + test.AddAttribute("pads", std::vector{0, 0, 0, 0, 0, 0}); + + std::vector input(25); + std::vector output(25); + std::iota(output.begin(), output.end(), 1.0f); + input = TransposeSerializedVector(output, 1, 1, 5, 5); + test.AddInput("input", {1, 5, 5}, input); + test.AddInput("image_shape", {3}, std::vector{1, 5, 5}); + test.AddInput("block_shape", {3}, std::vector{1, 1, 5}); + test.AddOutput("output", {1, 1, 1, 5, 5}, output); + test.Run(); +} + +} // namespace test +} // namespace onnxruntime