From c573cb6cca333c8ed9e02bd78a5068ef6762ce2e Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 23 Jan 2023 15:13:44 -0500 Subject: [PATCH 01/22] Adding tests --- .../GateImplementationsAVXCommon.hpp | 62 +--- .../cpu_kernels/avx_common/AVX2Concept.hpp | 3 - .../cpu_kernels/avx_common/AVX512Concept.hpp | 4 - .../cpu_kernels/avx_common/AVXConceptType.hpp | 49 +++ .../cpu_kernels/avx_common/AVXGateKernels.hpp | 35 ++ .../gates/cpu_kernels/avx_common/AVXUtil.hpp | 150 +++++--- .../cpu_kernels/avx_common/ApplyCNOT.hpp | 45 ++- .../gates/cpu_kernels/avx_common/ApplyCZ.hpp | 1 + .../cpu_kernels/avx_common/ApplyIsingXX.hpp | 2 +- .../cpu_kernels/avx_common/ApplyIsingYY.hpp | 17 +- .../cpu_kernels/avx_common/ApplyIsingZZ.hpp | 2 - .../cpu_kernels/avx_common/ApplyPauliX.hpp | 1 + .../cpu_kernels/avx_common/ApplyPauliY.hpp | 1 + .../cpu_kernels/avx_common/ApplyPauliZ.hpp | 1 + .../avx_common/ApplyPhaseShift.hpp | 58 +-- .../gates/cpu_kernels/avx_common/ApplyRX.hpp | 1 + .../gates/cpu_kernels/avx_common/ApplyRY.hpp | 1 + .../gates/cpu_kernels/avx_common/ApplyRZ.hpp | 1 + .../gates/cpu_kernels/avx_common/ApplyS.hpp | 18 +- .../cpu_kernels/avx_common/ApplySWAP.hpp | 32 +- .../avx_common/ApplySingleQubitOp.hpp | 1 + .../gates/cpu_kernels/avx_common/ApplyT.hpp | 31 +- .../cpu_kernels/avx_common/Permutation.hpp | 9 +- .../gates/cpu_kernels/avx_common/README.md | 14 + .../avx_common/SingleQubitGateHelper.hpp | 77 ++-- .../avx_common/TwoQubitGateHelper.hpp | 299 +++++++++++---- pennylane_lightning/src/tests/CMakeLists.txt | 4 +- .../src/tests/Test_AVXGateHelpers.cpp | 339 ++++++++++++++++++ .../src/tests/Test_TypeTraits.cpp | 53 +++ pennylane_lightning/src/util/TypeTraits.hpp | 24 ++ 30 files changed, 1046 insertions(+), 289 deletions(-) create mode 100644 pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXConceptType.hpp create mode 100644 pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXGateKernels.hpp create mode 100644 pennylane_lightning/src/gates/cpu_kernels/avx_common/README.md create mode 100644 pennylane_lightning/src/tests/Test_AVXGateHelpers.cpp create mode 100644 pennylane_lightning/src/tests/Test_TypeTraits.cpp diff --git a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsAVXCommon.hpp b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsAVXCommon.hpp index f56b790766..7e4a84befc 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsAVXCommon.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsAVXCommon.hpp @@ -16,33 +16,7 @@ * Defines kernel functions for all AVX */ #pragma once - -// General implementations -#include "Macros.hpp" - -#ifdef PL_USE_AVX2 -#include "avx_common/AVX2Concept.hpp" -#endif -#ifdef PL_USE_AVX512F -#include "avx_common/AVX512Concept.hpp" -#endif -#include "avx_common/ApplyCNOT.hpp" -#include "avx_common/ApplyCZ.hpp" -#include "avx_common/ApplyHadamard.hpp" -#include "avx_common/ApplyIsingXX.hpp" -#include "avx_common/ApplyIsingYY.hpp" -#include "avx_common/ApplyIsingZZ.hpp" -#include "avx_common/ApplyPauliX.hpp" -#include "avx_common/ApplyPauliY.hpp" -#include "avx_common/ApplyPauliZ.hpp" -#include "avx_common/ApplyPhaseShift.hpp" -#include "avx_common/ApplyRX.hpp" -#include "avx_common/ApplyRY.hpp" -#include "avx_common/ApplyRZ.hpp" -#include "avx_common/ApplyS.hpp" -#include "avx_common/ApplySWAP.hpp" -#include "avx_common/ApplySingleQubitOp.hpp" -#include "avx_common/ApplyT.hpp" +#include "avx_common/AVXGateKernels.hpp" #include "avx_common/SingleQubitGateHelper.hpp" #include "avx_common/TwoQubitGateHelper.hpp" @@ -91,7 +65,7 @@ class GateImplementationsAVXCommon std::is_same_v, "Only float and double are supported."); - assert(wires.size() == 1); + PL_ASSERT(wires.size() == 1); auto helper = AVXCommon::SingleQubitGateWithoutParamHelper( &GateImplementationsLM::applyPauliX); @@ -110,7 +84,7 @@ class GateImplementationsAVXCommon std::is_same_v, "Only float and double are supported."); - assert(wires.size() == 1); + PL_ASSERT(wires.size() == 1); auto helper = AVXCommon::SingleQubitGateWithoutParamHelper( &GateImplementationsLM::applyPauliY); @@ -129,7 +103,7 @@ class GateImplementationsAVXCommon std::is_same_v, "Only float and double are supported."); - assert(wires.size() == 1); + PL_ASSERT(wires.size() == 1); auto helper = AVXCommon::SingleQubitGateWithoutParamHelper( &GateImplementationsLM::applyPauliZ); @@ -161,7 +135,7 @@ class GateImplementationsAVXCommon std::is_same_v, "Only float and double are supported."); - assert(wires.size() == 1); + PL_ASSERT(wires.size() == 1); auto helper = AVXCommon::SingleQubitGateWithoutParamHelper( &GateImplementationsLM::applyT); helper(arr, num_qubits, wires, inverse); @@ -179,7 +153,7 @@ class GateImplementationsAVXCommon std::is_same_v, "Only float and double are supported."); - assert(wires.size() == 1); + PL_ASSERT(wires.size() == 1); auto helper = AVXCommon::SingleQubitGateWithParamHelper( @@ -198,7 +172,7 @@ class GateImplementationsAVXCommon static_assert(std::is_same_v || std::is_same_v, "Only float and double are supported."); - assert(wires.size() == 1); + PL_ASSERT(wires.size() == 1); auto helper = AVXCommon::SingleQubitGateWithoutParamHelper( &GateImplementationsLM::applyHadamard); @@ -215,7 +189,7 @@ class GateImplementationsAVXCommon static_assert(std::is_same_v || std::is_same_v, "Only float and double are supported."); - assert(wires.size() == 1); + PL_ASSERT(wires.size() == 1); auto helper = AVXCommon::SingleQubitGateWithParamHelper( &GateImplementationsLM::applyRX); @@ -232,7 +206,7 @@ class GateImplementationsAVXCommon static_assert(std::is_same_v || std::is_same_v, "Only float and double are supported."); - assert(wires.size() == 1); + PL_ASSERT(wires.size() == 1); auto helper = AVXCommon::SingleQubitGateWithParamHelper( &GateImplementationsLM::applyRY); @@ -249,7 +223,7 @@ class GateImplementationsAVXCommon static_assert(std::is_same_v || std::is_same_v, "Only float and double are supported."); - assert(wires.size() == 1); + PL_ASSERT(wires.size() == 1); auto helper = AVXCommon::SingleQubitGateWithParamHelper( &GateImplementationsLM::applyRZ); @@ -260,7 +234,7 @@ class GateImplementationsAVXCommon static void applyRot(std::complex *arr, const size_t num_qubits, const std::vector &wires, bool inverse, ParamT phi, ParamT theta, ParamT omega) { - assert(wires.size() == 1); + PL_ASSERT(wires.size() == 1); const auto rotMat = (inverse) ? Gates::getRot(-omega, -theta, -phi) @@ -282,7 +256,7 @@ class GateImplementationsAVXCommon std::is_same_v, "Only float and double are supported."); - assert(wires.size() == 2); + PL_ASSERT(wires.size() == 2); const AVXCommon::TwoQubitGateWithoutParamHelper gate_helper( &GateImplementationsLM::applyCZ); @@ -302,7 +276,7 @@ class GateImplementationsAVXCommon std::is_same_v, "Only float and double are supported."); - assert(wires.size() == 2); + PL_ASSERT(wires.size() == 2); const AVXCommon::TwoQubitGateWithoutParamHelper gate_helper(&GateImplementationsLM::applySWAP); @@ -314,7 +288,7 @@ class GateImplementationsAVXCommon static void applyCNOT(std::complex *arr, const size_t num_qubits, const std::vector &wires, [[maybe_unused]] bool inverse) { - assert(wires.size() == 2); + PL_ASSERT(wires.size() == 2); using ApplyCNOTAVX = AVXCommon::ApplyCNOT, "Only float and double are supported."); - assert(wires.size() == 2); + PL_ASSERT(wires.size() == 2); const AVXCommon::TwoQubitGateWithoutParamHelper gate_helper(&GateImplementationsLM::applyCNOT); @@ -340,7 +314,7 @@ class GateImplementationsAVXCommon const size_t num_qubits, const std::vector &wires, [[maybe_unused]] bool inverse, ParamT angle) { - assert(wires.size() == 2); + PL_ASSERT(wires.size() == 2); using ApplyIsingXXAVX = AVXCommon::ApplyIsingXX &wires, [[maybe_unused]] bool inverse, ParamT angle) { - assert(wires.size() == 2); + PL_ASSERT(wires.size() == 2); using ApplyIsingYYAVX = AVXCommon::ApplyIsingYY, "Only float and double are supported."); - assert(wires.size() == 2); + PL_ASSERT(wires.size() == 2); const AVXCommon::TwoQubitGateWithParamHelper gate_helper( diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX2Concept.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX2Concept.hpp index 6ea8f9534f..66ad8bb2b2 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX2Concept.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX2Concept.hpp @@ -18,7 +18,6 @@ #pragma once #include "AVXUtil.hpp" #include "BitUtil.hpp" -#include "Macros.hpp" #include "Util.hpp" #include @@ -110,6 +109,4 @@ template struct AVX2Concept { } } }; -template <> struct AVXConcept { using Type = AVX2Concept; }; -template <> struct AVXConcept { using Type = AVX2Concept; }; } // namespace Pennylane::Gates::AVXCommon diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX512Concept.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX512Concept.hpp index e0176a81f3..d589a9c35c 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX512Concept.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX512Concept.hpp @@ -111,8 +111,4 @@ template struct AVX512Concept { } }; -template <> struct AVXConcept { using Type = AVX512Concept; }; -template <> struct AVXConcept { - using Type = AVX512Concept; -}; } // namespace Pennylane::Gates::AVXCommon diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXConceptType.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXConceptType.hpp new file mode 100644 index 0000000000..86583309e6 --- /dev/null +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXConceptType.hpp @@ -0,0 +1,49 @@ +// Copyright 2023 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/** + * @file + * Defines AVXConcept types + */ +#pragma once + +#include "Macros.hpp" + +#ifdef PL_USE_AVX2 +#include "AVX2Concept.hpp" +#endif + +#ifdef PL_USE_AVX512F +#include "AVX512Concept.hpp" +#endif + +namespace Pennylane::Gates::AVXCommon { + +template struct AVXConcept; + +#ifdef PL_USE_AVX2 +template <> struct AVXConcept { using Type = AVX2Concept; }; +template <> struct AVXConcept { using Type = AVX2Concept; }; +#endif + +#ifdef PL_USE_AVX512F +template <> struct AVXConcept { using Type = AVX512Concept; }; +template <> struct AVXConcept { + using Type = AVX512Concept; +}; +#endif + +template +using AVXConceptType = typename AVXConcept::Type; + +} // namespace Pennylane::Gates::AVXCommon diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXGateKernels.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXGateKernels.hpp new file mode 100644 index 0000000000..087ec30115 --- /dev/null +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXGateKernels.hpp @@ -0,0 +1,35 @@ +// Copyright 2023 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/** + * @file + * Include all AVX gate implementations + */ +#pragma once +#include "ApplyCNOT.hpp" +#include "ApplyCZ.hpp" +#include "ApplyHadamard.hpp" +#include "ApplyIsingXX.hpp" +#include "ApplyIsingYY.hpp" +#include "ApplyIsingZZ.hpp" +#include "ApplyPauliX.hpp" +#include "ApplyPauliY.hpp" +#include "ApplyPauliZ.hpp" +#include "ApplyPhaseShift.hpp" +#include "ApplyRX.hpp" +#include "ApplyRY.hpp" +#include "ApplyRZ.hpp" +#include "ApplyS.hpp" +#include "ApplySWAP.hpp" +#include "ApplySingleQubitOp.hpp" +#include "ApplyT.hpp" diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp index 07cabe0912..27ee57fc63 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp @@ -17,6 +17,7 @@ */ #pragma once #include "BitUtil.hpp" +#include "Error.hpp" #include "Macros.hpp" #include "Util.hpp" @@ -56,60 +57,6 @@ template <> struct AVXIntrinsic { }; #endif -template struct AVXConcept; - -template -using AVXConceptType = typename AVXConcept::Type; - -/** - * @brief @rst - * For a function :math:`f(x)` with binary output, this function create - * an AVX intrinsic floating-point type with values :math:`(-1)^{f(x)}` - * where :math:`x` is index of an array (viewed as a complex-valued array). - * @endrst - * - * @rst - * For example, when :math:`f(x) = x % 2`, this returns a packed array - * with values [1, 1, -1, -1, 1, 1, -1, -1]. Note that each value is repeated - * twice as it applies to the both real and imaginary parts. This function is - * used e.g. in CZ gate. - * @endrst - * - * @tparam PrecisionT Floating point precision type - * @tparam packed_size Number of packed values for a AVX intrinsic type - * @tparam Func Type of a function - * @param func Binary output function - */ -template -auto toParity(Func &&func) -> AVXIntrinsicType { - std::array data = {}; - for (size_t idx = 0; idx < packed_size / 2; idx++) { - data[2 * idx + 0] = static_cast(1.0) - - 2 * static_cast(func(idx)); - data[2 * idx + 1] = static_cast(1.0) - - 2 * static_cast(func(idx)); - } - return AVXConceptType::loadu(data.data()); -} - -/** - * @brief Repeat the value of the function twice. - * - * As we treat a complex number as two real numbers, this helps when we - * multiply function outcomes to a AVX intrinsic type. - */ -template -auto setValueOneTwo(Func &&func) -> AVXIntrinsicType { - std::array data = { - 0, - }; - for (size_t idx = 0; idx < packed_size / 2; idx++) { - data[2 * idx + 0] = static_cast(func(idx)); - data[2 * idx + 1] = static_cast(func(idx)); - } - return AVXConceptType::loadu(data.data()); -} - /** * @brief one or minus one parity for reverse wire in packed data. */ @@ -133,7 +80,7 @@ template <> constexpr auto internalParity(size_t rev_wire) -> __m256 { template <> constexpr auto internalParity([[maybe_unused]] size_t rev_wire) -> __m256d { - assert(rev_wire == 0); + PL_ASSERT(rev_wire == 0); // When Z is applied to the 0th qubit return __m256d{1.0, 1.0, -1.0, -1.0}; } @@ -265,6 +212,53 @@ template struct InternalWires { template constexpr auto internal_wires_v = InternalWires::value; +template struct Set; +#ifdef PL_USE_AVX2 +template <> struct Set { + constexpr static auto create(const std::array &arr) + -> AVXIntrinsicType { + // NOLINTBEGIN(readability-magic-numbers) + return __m256{arr[0], arr[1], arr[2], arr[3], + arr[4], arr[5], arr[6], arr[7]}; + // NOLINTEND(readability-magic-numbers) + } +}; +template <> struct Set { + constexpr static auto create(const std::array &arr) + -> AVXIntrinsicType { + // NOLINTBEGIN(readability-magic-numbers) + return __m256d{arr[0], arr[1], arr[2], arr[3]}; + // NOLINTEND(readability-magic-numbers) + } +}; +#endif +#ifdef PL_USE_AVX512F +template <> struct Set { + constexpr static auto create(const std::array &arr) + -> AVXIntrinsicType { + // NOLINTBEGIN(readability-magic-numbers) + return __m512{arr[0], arr[1], arr[2], arr[3], arr[4], arr[5], + arr[6], arr[7], arr[8], arr[9], arr[10], arr[11], + arr[12], arr[13], arr[14], arr[15]}; + // NOLINTEND(readability-magic-numbers) + } +}; +template <> struct Set { + constexpr static auto create(const std::array &arr) + -> AVXIntrinsicType { + // NOLINTBEGIN(readability-magic-numbers) + return __m512d{arr[0], arr[1], arr[2], arr[3], + arr[4], arr[5], arr[6], arr[7]}; + // NOLINTEND(readability-magic-numbers) + } +}; +#endif +template +constexpr auto set(const std::array &arr) + -> AVXIntrinsicType { + return Set::create(arr); +} + // clang-format off #ifdef PL_USE_AVX2 constexpr __m256i setr256i(int32_t e0, int32_t e1, int32_t e2, int32_t e3, @@ -302,4 +296,50 @@ constexpr __m512i setr512i(int64_t e0, int64_t e1, int64_t e2, int64_t e3, #endif // clang-format on +/** + * @brief @rst + * For a function :math:`f(x)` with binary output, this function create + * an AVX intrinsic floating-point type with values :math:`(-1)^{f(x)}` + * where :math:`x` is index of an array (viewed as a complex-valued array). + * @endrst + * + * @rst + * For example, when :math:`f(x) = x % 2`, this returns a packed array + * with values [1, 1, -1, -1, 1, 1, -1, -1]. Note that each value is repeated + * twice as it applies to the both real and imaginary parts. This function is + * used e.g. in CZ gate. + * @endrst + * + * @tparam PrecisionT Floating point precision type + * @tparam packed_size Number of packed values for a AVX intrinsic type + * @tparam Func Type of a function + * @param func Binary output function + */ +template +auto toParity(Func &&func) -> AVXIntrinsicType { + std::array data{}; + for (size_t idx = 0; idx < packed_size / 2; idx++) { + data[2 * idx + 0] = static_cast(1.0) - + 2 * static_cast(func(idx)); + data[2 * idx + 1] = static_cast(1.0) - + 2 * static_cast(func(idx)); + } + return set(data); +} + +/** + * @brief Repeat the value of the function twice. + * + * As we treat a complex number as two real numbers, this helps when we + * multiply function outcomes to a AVX intrinsic type. + */ +template +auto setValueOneTwo(Func &&func) -> AVXIntrinsicType { + std::array data{}; + for (size_t idx = 0; idx < packed_size / 2; idx++) { + data[2 * idx + 0] = static_cast(func(idx)); + data[2 * idx + 1] = static_cast(func(idx)); + } + return set(data); +} } // namespace Pennylane::Gates::AVXCommon diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyCNOT.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyCNOT.hpp index 0f53c5defd..ab270c29f1 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyCNOT.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyCNOT.hpp @@ -16,6 +16,7 @@ * Defines CNOT gate */ #pragma once +#include "AVXConceptType.hpp" #include "AVXUtil.hpp" #include "BitUtil.hpp" #include "Blender.hpp" @@ -37,11 +38,9 @@ template struct ApplyCNOT { constexpr static auto packed_size_ = packed_size; constexpr static bool symmetric = false; - template - static constexpr auto permutationInternalInternal() { - std::array perm = { - 0, - }; + template + static consteval auto applyInternalInternalPermuation() { + std::array perm{}; for (size_t k = 0; k < packed_size / 2; k++) { if ((k >> control) & 1U) { // if control bit is 1 @@ -60,7 +59,7 @@ template struct ApplyCNOT { size_t num_qubits, [[maybe_unused]] bool inverse) { constexpr static auto perm = - permutationInternalInternal(); + applyInternalInternalPermuation(); for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) { const auto v = PrecisionAVXConcept::load(arr + n); @@ -68,14 +67,16 @@ template struct ApplyCNOT { } } - template static constexpr auto maskInternalExternal() { - std::array mask = { - false, - }; + template + static consteval auto applyInternalExternalMask() { + std::array mask{}; for (size_t k = 0; k < packed_size / 2; k++) { if ((k >> control) & 1U) { mask[2 * k + 0] = true; mask[2 * k + 1] = true; + } else { + mask[2 * k + 0] = false; + mask[2 * k + 1] = false; } } return compileMask(mask); @@ -99,7 +100,7 @@ template struct ApplyCNOT { const size_t max_wire_parity = fillTrailingOnes(rev_wire_max); const size_t max_wire_parity_inv = fillLeadingOnes(rev_wire_max + 1); - constexpr static auto mask = maskInternalExternal(); + constexpr static auto mask = applyInternalExternalMask(); for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) { const size_t i0 = @@ -114,11 +115,12 @@ template struct ApplyCNOT { } } + /** + * @brief Permutation that flip the target bit. + */ template - static constexpr auto permutationExternalInternal() { - std::array perm = { - 0, - }; + static consteval auto applyExternalInternalPermutation() { + std::array perm{}; for (size_t k = 0; k < packed_size / 2; k++) { perm[2 * k + 0] = 2 * (k ^ (1U << target)) + 0; perm[2 * k + 1] = 2 * (k ^ (1U << target)) + 1; @@ -132,19 +134,16 @@ template struct ApplyCNOT { [[maybe_unused]] bool inverse) { // control qubit is external but target qubit is external // const size_t rev_wire_min = std::min(rev_wire0, rev_wire1); - const size_t rev_wire_max = std::max(control, target); + const size_t control_shift = (static_cast(1U) << control); + const size_t max_wire_parity = fillTrailingOnes(control); + const size_t max_wire_parity_inv = fillLeadingOnes(control + 1); - const size_t max_rev_wire_shift = - (static_cast(1U) << rev_wire_max); - const size_t max_wire_parity = fillTrailingOnes(rev_wire_max); - const size_t max_wire_parity_inv = fillLeadingOnes(rev_wire_max + 1); - - constexpr static auto perm = permutationExternalInternal(); + constexpr static auto perm = applyExternalInternalPermutation(); for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) { const size_t i0 = ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k); - const size_t i1 = i0 | max_rev_wire_shift; + const size_t i1 = i0 | control_shift; const auto v1 = PrecisionAVXConcept::load(arr + i1); PrecisionAVXConcept::store(arr + i1, diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyCZ.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyCZ.hpp index 0905c5368b..f8476f209d 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyCZ.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyCZ.hpp @@ -16,6 +16,7 @@ * Defines CZ gate */ #pragma once +#include "AVXConceptType.hpp" #include "AVXUtil.hpp" #include "BitUtil.hpp" #include "Util.hpp" diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingXX.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingXX.hpp index e6d76bca5c..191922225e 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingXX.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingXX.hpp @@ -36,7 +36,7 @@ template struct ApplyIsingXX { constexpr static bool symmetric = true; template - static constexpr auto permutationInternalInternal() { + static consteval auto permutationInternalInternal() { std::array perm = { 0, }; diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingYY.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingYY.hpp index 119e7db775..dc6e934531 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingYY.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingYY.hpp @@ -36,7 +36,7 @@ template struct ApplyIsingYY { constexpr static bool symmetric = true; template - static constexpr auto permutationInternalInternal() { + static consteval auto permutationInternalInternal() { std::array perm = { 0, }; @@ -59,6 +59,9 @@ template struct ApplyIsingYY { const auto real_cos = set1(std::cos(angle / 2)); + + // Imaginary sin factor. It is -sin(phi/2) for 01 and 10, sin(phi/2) + // otherwise const auto imag_sin = imagFactor(isin) * toParity([](size_t n) { @@ -148,8 +151,8 @@ template struct ApplyIsingYY { const auto cos_factor = set1(std::cos(angle / 2)); - const auto p_isin_factor = imagFactor(isin); - const auto m_isin_factor = imagFactor(-isin); + const auto isin_factor_p = imagFactor(isin); + const auto isin_factor_m = imagFactor(-isin); constexpr static auto perm = compilePermutation( swapRealImag(identity())); @@ -167,16 +170,16 @@ template struct ApplyIsingYY { const auto v11 = PrecisionAVXConcept::load(arr + i11); // 11 const auto prod_cos00 = cos_factor * v00; - const auto prod_isin00 = m_isin_factor * permute(v11); + const auto prod_isin00 = isin_factor_m * permute(v11); const auto prod_cos01 = cos_factor * v01; - const auto prod_isin01 = p_isin_factor * permute(v10); + const auto prod_isin01 = isin_factor_p * permute(v10); const auto prod_cos10 = cos_factor * v10; - const auto prod_isin10 = p_isin_factor * permute(v01); + const auto prod_isin10 = isin_factor_p * permute(v01); const auto prod_cos11 = cos_factor * v11; - const auto prod_isin11 = m_isin_factor * permute(v00); + const auto prod_isin11 = isin_factor_m * permute(v00); PrecisionAVXConcept::store(arr + i00, prod_cos00 + prod_isin00); PrecisionAVXConcept::store(arr + i01, prod_cos01 + prod_isin01); diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingZZ.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingZZ.hpp index dc470943fc..b9241de731 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingZZ.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingZZ.hpp @@ -41,8 +41,6 @@ template struct ApplyIsingZZ { static void applyInternalInternal(std::complex *arr, size_t num_qubits, bool inverse, ParamT angle) { - // This function is allowed for AVX512 and AVX2 with float - const auto isin = inverse ? std::sin(angle / 2) : -std::sin(angle / 2); const auto parity = toParity([=](size_t idx) { return ((idx >> rev_wire0) & 1U) ^ ((idx >> rev_wire1) & 1U); diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPauliX.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPauliX.hpp index f78e62f48a..fb14058325 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPauliX.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPauliX.hpp @@ -16,6 +16,7 @@ * Defines PauliX gate */ #pragma once +#include "AVXConceptType.hpp" #include "AVXUtil.hpp" #include "BitUtil.hpp" #include "Permutation.hpp" diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPauliY.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPauliY.hpp index 3292a05fbd..01805d90e5 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPauliY.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPauliY.hpp @@ -16,6 +16,7 @@ * Defines PauliY gate */ #pragma once +#include "AVXConceptType.hpp" #include "AVXUtil.hpp" #include "BitUtil.hpp" #include "Permutation.hpp" diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPauliZ.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPauliZ.hpp index c97e237fb1..babee73e8e 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPauliZ.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPauliZ.hpp @@ -16,6 +16,7 @@ * Defines PauliZ gate */ #pragma once +#include "AVXConceptType.hpp" #include "AVXUtil.hpp" #include "BitUtil.hpp" #include "Util.hpp" diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp index 00512155a6..771d1c5c82 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp @@ -16,6 +16,7 @@ * Defines PhaseShift gate */ #pragma once +#include "AVXConceptType.hpp" #include "AVXUtil.hpp" #include "BitUtil.hpp" #include "Permutation.hpp" @@ -33,10 +34,13 @@ template struct ApplyPhaseShift { constexpr static size_t packed_size_ = packed_size; - static constexpr auto createPermutation(size_t rev_wire) { - std::array perm = { - 0, - }; + /** + * @brief Permutation for applying `i` if a bit is 1 + * + * FIXME: clang++-12 currently does not accept consteval here. + */ + static constexpr auto applyInternalPermutation(size_t rev_wire) { + std::array perm{}; for (size_t n = 0; n < packed_size / 2; n++) { if (((n >> rev_wire) & 1U) == 0) { @@ -51,50 +55,50 @@ template struct ApplyPhaseShift { return Permutation::compilePermutation(perm); } - static auto cosFactor(size_t rev_wire, PrecisionT cos) + /** + * @brief Factor for applying [1, 1, cos(phi/2), cos(phi/2)] + */ + static auto cosFactor(size_t rev_wire, PrecisionT angle) -> AVXIntrinsicType { - std::array data = { - 0, - }; + std::array arr{}; for (size_t n = 0; n < packed_size / 2; n++) { if (((n >> rev_wire) & 1U) == 0) { - data[2 * n + 0] = 1.0; - data[2 * n + 1] = 1.0; + arr[2 * n + 0] = 1.0; + arr[2 * n + 1] = 1.0; } else { - data[2 * n + 0] = cos; - data[2 * n + 1] = cos; + arr[2 * n + 0] = std::cos(angle); + arr[2 * n + 1] = std::cos(angle); } } - return PrecisionAVXConcept::loadu(data.data()); + return set(arr); } - static auto isinFactor(size_t rev_wire, PrecisionT isin) + /** + * @brief Factor for applying [0, 0, -sin(phi/2), sin(phi/2)] + */ + static auto isinFactor(size_t rev_wire, PrecisionT angle) -> AVXIntrinsicType { - std::array data = { - 0, - }; + std::array arr{}; for (size_t n = 0; n < packed_size / 2; n++) { if (((n >> rev_wire) & 1U) == 0) { - data[2 * n + 0] = 0.0; - data[2 * n + 1] = 0.0; + arr[2 * n + 0] = 0.0; + arr[2 * n + 1] = 0.0; } else { - data[2 * n + 0] = -isin; - data[2 * n + 1] = isin; + arr[2 * n + 0] = -std::sin(angle); + arr[2 * n + 1] = std::sin(angle); } } - return PrecisionAVXConcept::loadu(data.data()); + return set(arr); } template static void applyInternal(std::complex *arr, const size_t num_qubits, bool inverse, ParamT angle) { - constexpr static auto perm = createPermutation(rev_wire); - const auto cos_factor = - cosFactor(rev_wire, static_cast(cos(angle))); + constexpr static auto perm = applyInternalPermutation(rev_wire); + const auto cos_factor = cosFactor(rev_wire, angle); const auto isin_factor = - isinFactor(rev_wire, static_cast(inverse ? -1.0 : 1.0) * - static_cast(sin(angle))); + isinFactor(rev_wire, (inverse ? -angle : angle)); for (size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) { const auto v = PrecisionAVXConcept::load(arr + k); diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyRX.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyRX.hpp index a7d6c47824..60111f8f1d 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyRX.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyRX.hpp @@ -16,6 +16,7 @@ * Defines RX gate */ #pragma once +#include "AVXConceptType.hpp" #include "AVXUtil.hpp" #include "BitUtil.hpp" #include "Permutation.hpp" diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyRY.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyRY.hpp index 967a282a7f..2014df0885 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyRY.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyRY.hpp @@ -16,6 +16,7 @@ * Defines RY gate */ #pragma once +#include "AVXConceptType.hpp" #include "AVXUtil.hpp" #include "BitUtil.hpp" #include "Permutation.hpp" diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyRZ.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyRZ.hpp index 7c49a2f972..74b84a81ea 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyRZ.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyRZ.hpp @@ -16,6 +16,7 @@ * Defines RZ gate */ #pragma once +#include "AVXConceptType.hpp" #include "AVXUtil.hpp" #include "BitUtil.hpp" #include "Permutation.hpp" diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyS.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyS.hpp index 59e9aaa0be..7d8c5f70da 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyS.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyS.hpp @@ -16,6 +16,7 @@ * Defines S gate */ #pragma once +#include "AVXConceptType.hpp" #include "AVXUtil.hpp" #include "BitUtil.hpp" #include "Permutation.hpp" @@ -32,10 +33,13 @@ template struct ApplyS { constexpr static size_t packed_size_ = packed_size; - static constexpr auto createPermutation(size_t rev_wire) { - std::array perm = { - 0, - }; + /** + * @brief Permutation for applying `i` to + * + * FIXME: clang++-12 currently does not accept consteval here. + */ + static constexpr auto applyInternalPermutation(size_t rev_wire) { + std::array perm{}; for (size_t n = 0; n < packed_size / 2; n++) { if (((n >> rev_wire) & 1U) == 0) { @@ -51,9 +55,7 @@ template struct ApplyS { static auto createFactor(size_t rev_wire, bool inverse) -> AVXIntrinsicType { - std::array data = { - 0, - }; + std::array data{}; for (size_t n = 0; n < packed_size / 2; n++) { if (((n >> rev_wire) & 1U) == 0) { data[2 * n + 0] = 1.0; @@ -74,7 +76,7 @@ template struct ApplyS { template static void applyInternal(std::complex *arr, const size_t num_qubits, bool inverse) { - constexpr static auto perm = createPermutation(rev_wire); + constexpr static auto perm = applyInternalPermutation(rev_wire); const auto factor = createFactor(rev_wire, inverse); for (size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) { diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplySWAP.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplySWAP.hpp index f01b510188..5dc0e955d7 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplySWAP.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplySWAP.hpp @@ -16,6 +16,7 @@ * Defines SWAP gate */ #pragma once +#include "AVXConceptType.hpp" #include "AVXUtil.hpp" #include "BitUtil.hpp" #include "Blender.hpp" @@ -36,12 +37,13 @@ template struct ApplySWAP { constexpr static size_t packed_size_ = packed_size; constexpr static bool symmetric = true; + /** + * @brief Permutation that swaps bits in two wires + */ template - constexpr static auto swapPermutation() { + static consteval auto applyInternalInternalPermutation() { const auto identity_perm = Permutation::identity(); - std::array perm = { - 0, - }; + std::array perm{}; for (size_t i = 0; i < packed_size / 2; i++) { // swap rev_wire1 and rev_wire0 bits @@ -58,7 +60,8 @@ template struct ApplySWAP { size_t num_qubits, [[maybe_unused]] bool inverse) { using namespace Permutation; - constexpr static auto perm = swapPermutation(); + constexpr static auto perm = + applyInternalInternalPermutation(); for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) { const auto v = PrecisionAVXConcept::load(arr + n); @@ -66,10 +69,11 @@ template struct ApplySWAP { } } - template constexpr static auto createMask0() { - std::array m = { - false, - }; + /** + * @brief Setting a mask. Mask is 1 if bits in min_rev_wire is set + */ + template static consteval auto createMask0() { + std::array m{}; for (size_t i = 0; i < packed_size / 2; i++) { if ((i & (1U << min_rev_wire)) != 0) { m[2 * i + 0] = true; @@ -81,10 +85,12 @@ template struct ApplySWAP { } return compileMask(m); } - template constexpr static auto createMask1() { - std::array m = { - false, - }; + + /** + * @brief Setting a mask. Mask is 1 if bits in min_rev_wire is unset + */ + template static consteval auto createMask1() { + std::array m = {}; for (size_t i = 0; i < packed_size / 2; i++) { if ((i & (1U << min_rev_wire)) != 0) { m[2 * i + 0] = false; diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplySingleQubitOp.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplySingleQubitOp.hpp index bab9d3d0eb..bd359bddfd 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplySingleQubitOp.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplySingleQubitOp.hpp @@ -16,6 +16,7 @@ * Defines applySingleQubitOp for AVX */ #pragma once +#include "AVXConceptType.hpp" #include "AVXUtil.hpp" #include "BitUtil.hpp" #include "Permutation.hpp" diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyT.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyT.hpp index 505932a71a..94b056543c 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyT.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyT.hpp @@ -16,6 +16,7 @@ * Defines T gate */ #pragma once +#include "AVXConceptType.hpp" #include "AVXUtil.hpp" #include "BitUtil.hpp" #include "Permutation.hpp" @@ -34,10 +35,13 @@ template struct ApplyT { constexpr static size_t packed_size_ = packed_size; constexpr static auto isqrt2 = Util::INVSQRT2(); - static constexpr auto createPermutation(size_t rev_wire) { - std::array perm = { - 0, - }; + /** + * @brief Permutation for applying `i` if a bit is 1 + * + * FIXME: clang++-12 currently does not accept consteval here. + */ + static constexpr auto applyInternalPermutation(size_t rev_wire) { + std::array perm{}; for (size_t n = 0; n < packed_size / 2; n++) { if (((n >> rev_wire) & 1U) == 0) { @@ -52,11 +56,9 @@ template struct ApplyT { return Permutation::compilePermutation(perm); } - static auto cosFactor(size_t rev_wire) + static auto applyInternalRealFactor(size_t rev_wire) -> AVXIntrinsicType { - std::array data = { - 0, - }; + std::array data{}; for (size_t n = 0; n < packed_size / 2; n++) { if (((n >> rev_wire) & 1U) == 0) { data[2 * n + 0] = 1.0; @@ -69,11 +71,9 @@ template struct ApplyT { return PrecisionAVXConcept::loadu(data.data()); } - static auto isinFactor(size_t rev_wire, bool inverse) + static auto applyInternalImagFactor(size_t rev_wire, bool inverse) -> AVXIntrinsicType { - std::array data = { - 0, - }; + std::array data{}; for (size_t n = 0; n < packed_size / 2; n++) { if (((n >> rev_wire) & 1U) == 0) { data[2 * n + 0] = 0.0; @@ -94,9 +94,10 @@ template struct ApplyT { template static void applyInternal(std::complex *arr, const size_t num_qubits, bool inverse) { - constexpr static auto perm = createPermutation(rev_wire); - const auto cos_factor = cosFactor(rev_wire); - const auto isin_factor = isinFactor(rev_wire, inverse); + constexpr static auto perm = applyInternalPermutation(rev_wire); + + const auto cos_factor = applyInternalRealFactor(rev_wire); + const auto isin_factor = applyInternalImagFactor(rev_wire, inverse); for (size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) { const auto v = PrecisionAVXConcept::load(arr + k); diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/Permutation.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/Permutation.hpp index 01148c4c05..b97e8226dc 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/Permutation.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/Permutation.hpp @@ -219,7 +219,8 @@ compilePermutation(const std::array &permutation) return {within_lane, getPermutation8x256i(permutation)}; } template <> -constexpr auto compilePermutation(const std::array &permutation) +constexpr auto +compilePermutation(const std::array &permutation) -> CompiledPermutation { bool within_lane = isWithinLane(permutation); @@ -234,7 +235,8 @@ constexpr auto compilePermutation(const std::array &permutation) #ifdef PL_USE_AVX512F // Specializations for AVX512 begin // LCOV_EXCL_START template <> -constexpr auto compilePermutation(const std::array &permutation) +constexpr auto +compilePermutation(const std::array &permutation) -> CompiledPermutation { bool within_lane = isWithinLane(permutation); @@ -245,7 +247,8 @@ constexpr auto compilePermutation(const std::array &permutation) return {within_lane, getPermutation16x512i(permutation)}; } template <> -constexpr auto compilePermutation(const std::array &permutation) +constexpr auto +compilePermutation(const std::array &permutation) -> CompiledPermutation { bool within_lane = isWithinLane(permutation); diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/README.md b/pennylane_lightning/src/gates/cpu_kernels/avx_common/README.md new file mode 100644 index 0000000000..b7ccf9c734 --- /dev/null +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/README.md @@ -0,0 +1,14 @@ +# Implementation of PennyLane-Lightning AVX2/512 kernels + +Each gate operation is implemented in a class with a corresponding name. For example, SWAP operation is implemented in `ApplySwap` class defined in [ApplySwap.cpp](ApplySwap.cpp) file. + +Depending on the wires gates apply to, we use two (for single-qubit operations), three (for symmetric two-qubit operators), and four (for non-symmetric two-qubit operators) functions to implement each gate. +For single-qubit operations, functions `applyInternal` corresponds to intra-register gate operations and `applyExternal` corresponds to inter-register gate operations. +For two-qubit operations, we have `applyInternalInternal` (both wires act internally), `applyInternalExternal` (control wire acts internally whereas target wire acts externally), `applyExternalInternal` (target wire acts internally whereas control wire acts externally), and `applyExternalExternal` (both wires act externally). + + +Most cases, we implement a gate operation by splitting it into permutations, multiplications, and summations. These operations are translated into intrinsics in the compile time using C++ template mechanism. +Permutations and factors for multiplications are often obtained from functions. Those functions are named by concatenating the function name it is called with `Permutation` or `Factor`. For example, `applyInternalInternalPermuation` returns a permutation that is required for `applyInternalInternal` function. + + +See [the document](https://docs.pennylane.ai/projects/lightning/en/stable/avx_kernels/implementation.html) for details of the implementation. diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/SingleQubitGateHelper.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/SingleQubitGateHelper.hpp index 27b49176b6..b701565b26 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/SingleQubitGateHelper.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/SingleQubitGateHelper.hpp @@ -14,10 +14,19 @@ /** * @file * A helper class for single-qubit gates + * + * Define helper classes for AVX2/512 implementations of single-qubit gates. + * Depending on the wire the gate applies to, one needs to call + * ``applyInternal`` or ``applyExternal`` in classes implementing AVX2/512 + * gates. As ``applyInternal`` takes ``wire`` as a template parameters, we + * instantiates this function for all possible ``wire`` and call the correct one + * in runtime. */ #pragma once #include "BitUtil.hpp" #include "ConstantUtil.hpp" +#include "Error.hpp" +#include "TypeTraits.hpp" #include #include @@ -103,12 +112,17 @@ constexpr auto InternalFunctions() -> decltype(auto) { } // namespace Internal /// @endcond +/** + * @brief A Helper class for single-qubit gates without parameters. + */ template class SingleQubitGateWithoutParamHelper { public: using Precision = typename AVXImpl::Precision; - using FuncType = void (*)(std::complex *, size_t, - const std::vector &, bool); + using ReturnType = + typename Util::FuncReturn::Type; + using FuncType = ReturnType (*)(std::complex *, size_t, + const std::vector &, bool); constexpr static size_t packed_size = AVXImpl::packed_size_; private: @@ -118,9 +132,19 @@ class SingleQubitGateWithoutParamHelper { explicit SingleQubitGateWithoutParamHelper(FuncType fallback_func) : fallback_func_{fallback_func} {} - void operator()(std::complex *arr, const size_t num_qubits, - const std::vector &wires, bool inverse) { - assert(wires.size() == 1); + /** + * @brief This function calls corresponding AVX2/512 by finding the correct + * one based on ``wires``. + * + * @param arr Pointer to a statevector array + * @param num_qubits Number of qubits + * @param wires Wires the gate applies to + * @param inverse Apply the inverse of the gate when true + */ + auto operator()(std::complex *arr, const size_t num_qubits, + const std::vector &wires, bool inverse) const + -> ReturnType { + PL_ASSERT(wires.size() == 1); constexpr static size_t internal_wires = Util::log2PerfectPower(packed_size / 2); @@ -130,26 +154,29 @@ class SingleQubitGateWithoutParamHelper { const size_t rev_wire = num_qubits - wires[0] - 1; if (Util::exp2(num_qubits) < packed_size / 2) { - fallback_func_(arr, num_qubits, wires, inverse); - return; + return fallback_func_(arr, num_qubits, wires, inverse); } if (rev_wire < internal_wires) { auto func = internal_functions[rev_wire]; - (*func)(arr, num_qubits, inverse); - return; + return (*func)(arr, num_qubits, inverse); } - AVXImpl::applyExternal(arr, num_qubits, rev_wire, inverse); + return AVXImpl::applyExternal(arr, num_qubits, rev_wire, inverse); } }; +/** + * @brief A Helper class for single-qubit gates with parameters. + */ template class SingleQubitGateWithParamHelper { public: using Precision = typename AVXImpl::Precision; - using FuncType = void (*)(std::complex *, size_t, - const std::vector &, bool, ParamT); + using ReturnType = typename Util::FuncReturn< + decltype(AVXImpl::template applyExternal)>::Type; + using FuncType = ReturnType (*)(std::complex *, size_t, + const std::vector &, bool, ParamT); constexpr static size_t packed_size = AVXImpl::packed_size_; private: @@ -159,10 +186,20 @@ class SingleQubitGateWithParamHelper { explicit SingleQubitGateWithParamHelper(FuncType fallback_func) : fallback_func_{fallback_func} {} - void operator()(std::complex *arr, const size_t num_qubits, + /** + * @brief This function calls corresponding AVX2/512 by finding the correct + * one based on ``wires``. + * + * @param arr Pointer to a statevector array + * @param num_qubits Number of qubits + * @param wires Wires the gate applies to + * @param inverse Apply the inverse of the gate when true + * @param angle Parameter of the gate + */ + auto operator()(std::complex *arr, const size_t num_qubits, const std::vector &wires, bool inverse, - ParamT angle) { - assert(wires.size() == 1); + ParamT angle) const -> ReturnType { + PL_ASSERT(wires.size() == 1); constexpr static size_t internal_wires = Util::log2PerfectPower(packed_size / 2); @@ -173,18 +210,16 @@ class SingleQubitGateWithParamHelper { // When the size of an array is smaller than the AVX type if (Util::exp2(num_qubits) < packed_size / 2) { - fallback_func_(arr, num_qubits, wires, inverse, angle); - return; + return fallback_func_(arr, num_qubits, wires, inverse, angle); } // The gate applies within a register (packed bytes) if (rev_wire < internal_wires) { auto func = internal_functions[rev_wire]; - (*func)(arr, num_qubits, inverse, angle); - return; + return (*func)(arr, num_qubits, inverse, angle); } - - AVXImpl::applyExternal(arr, num_qubits, rev_wire, inverse, angle); + return AVXImpl::applyExternal(arr, num_qubits, rev_wire, inverse, + angle); } }; } // namespace Pennylane::Gates::AVXCommon diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/TwoQubitGateHelper.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/TwoQubitGateHelper.hpp index e769b79759..4678217547 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/TwoQubitGateHelper.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/TwoQubitGateHelper.hpp @@ -14,10 +14,20 @@ /** * @file * A helper class for two-qubit gates + * + * Define helper classes for AVX2/512 implementations of two-qubit gates. + * Depending on the wire the gate applies to, one needs to call one of + * ``applyInternalInternal``, ``applyInternalExternal``, + * ``applyExternalInternal``, and `applyExternalExternal``` in classes + * implementing AVX2/512 gates (see README.md). As those functions takes + * ``control`` and ``target`` wires as a template parameters, we instantiates + * these function for all possible choice of ``wires`` and call the correct one + * in runtime. */ #pragma once #include "BitUtil.hpp" #include "ConstantUtil.hpp" +#include "TypeTraits.hpp" #include #include @@ -51,6 +61,14 @@ struct HasInternalExternalWithParam< T, std::void_t)>> : std::true_type {}; +template +struct HasExternalInternalWithParam : std::false_type {}; + +template +struct HasExternalInternalWithParam< + T, std::void_t)>> + : std::true_type {}; + template struct HasExternalInternalWithoutParam : std::false_type {}; @@ -88,6 +106,13 @@ concept SymmetricTwoQubitGateWithParam = HasInternalExternalWithParam::value && HasExternalExternalWithParam::value; +template +concept AsymmetricTwoQubitGateWithParam = + !T::symmetric && HasInternalInternalWithParam::value && + HasInternalExternalWithParam::value && + HasExternalInternalWithParam::value && + HasExternalExternalWithParam::value; + template concept SymmetricTwoQubitGateWithoutParam = T::symmetric && HasInternalInternalWithoutParam::value && @@ -101,26 +126,16 @@ concept AsymmetricTwoQubitGateWithoutParam = HasExternalInternalWithoutParam::value && HasExternalExternalWithoutParam::value; +template +concept TwoQubitGateWithParam = + SymmetricTwoQubitGateWithParam || AsymmetricTwoQubitGateWithParam; + template concept TwoQubitGateWithoutParam = SymmetricTwoQubitGateWithoutParam || AsymmetricTwoQubitGateWithoutParam; namespace Internal { -template -constexpr auto ExternalInternalFunctions_Iter( - [[maybe_unused]] std::index_sequence dummy) -> decltype(auto) { - return Util::tuple_to_array( - std::tuple{&AVXImpl::template applyExternalInternal...}); -} - -template -constexpr auto ExternalInternalFunctions() -> decltype(auto) { - constexpr size_t internal_wires = - Util::log2PerfectPower(AVXImpl::packed_size_ / 2); - return ExternalInternalFunctions_Iter( - std::make_index_sequence()); -} -// Symmetric two qubit gate without param begin +// InternalInternal template constexpr auto InternalInternalFunctions_IterTargets( @@ -138,6 +153,32 @@ constexpr auto InternalInternalFunctions_IterTargets( &AVXImpl::template applyInternalInternal...}; } +template +constexpr auto InternalInternalFunctions_IterTargets( + [[maybe_unused]] std::index_sequence dummy) { + return std::array{&AVXImpl::template applyInternalInternal< + std::min(control, target), std::max(control, target), ParamT>...}; +} + +template +constexpr auto InternalInternalFunctions_IterTargets( + [[maybe_unused]] std::index_sequence dummy) { + return std::array{ + &AVXImpl::template applyInternalInternal...}; +} + +template +constexpr auto InternalInternalFunctions_Iter( + [[maybe_unused]] std::index_sequence dummy) { + constexpr size_t internal_wires = + Util::log2PerfectPower(AVXImpl::packed_size_ / 2); + return Util::tuple_to_array(std::tuple{ + InternalInternalFunctions_IterTargets( + std::make_index_sequence())...}); +} + template constexpr auto InternalInternalFunctions_Iter( [[maybe_unused]] std::index_sequence dummy) { @@ -148,6 +189,14 @@ constexpr auto InternalInternalFunctions_Iter( std::make_index_sequence())...}); } +template +constexpr auto InternalInternalFunctions() -> decltype(auto) { + constexpr size_t internal_wires = + Util::log2PerfectPower(AVXImpl::packed_size_ / 2); + return InternalInternalFunctions_Iter( + std::make_index_sequence()); +} + template constexpr auto InternalInternalFunctions() -> decltype(auto) { constexpr size_t internal_wires = @@ -156,6 +205,37 @@ constexpr auto InternalInternalFunctions() -> decltype(auto) { std::make_index_sequence()); } +// Asymmetric two qubit gate without param begin +template +constexpr auto ExternalInternalFunctions_Iter( + [[maybe_unused]] std::index_sequence dummy) -> decltype(auto) { + return Util::tuple_to_array( + std::tuple{&AVXImpl::template applyExternalInternal...}); +} + +template +constexpr auto ExternalInternalFunctions() -> decltype(auto) { + constexpr size_t internal_wires = + Util::log2PerfectPower(AVXImpl::packed_size_ / 2); + return ExternalInternalFunctions_Iter( + std::make_index_sequence()); +} +template +constexpr auto ExternalInternalFunctions_Iter( + [[maybe_unused]] std::index_sequence dummy) -> decltype(auto) { + return Util::tuple_to_array(std::tuple{ + &AVXImpl::template applyExternalInternal...}); +} + +template +constexpr auto ExternalInternalFunctions() -> decltype(auto) { + constexpr size_t internal_wires = + Util::log2PerfectPower(AVXImpl::packed_size_ / 2); + return ExternalInternalFunctions_Iter( + std::make_index_sequence()); +} + template constexpr auto InternalExternalFunctions_Iter( [[maybe_unused]] std::index_sequence dummy) -> decltype(auto) { @@ -169,6 +249,21 @@ constexpr auto InternalExternalFunctions() -> decltype(auto) { return InternalExternalFunctions_Iter( std::make_index_sequence()); } + +template +constexpr auto InternalExternalFunctions_Iter( + [[maybe_unused]] std::index_sequence dummy) -> decltype(auto) { + return std::array{ + &AVXImpl::template applyInternalExternal...}; +} + +template +constexpr auto InternalExternalFunctions() -> decltype(auto) { + constexpr size_t internal_wires = + Util::log2PerfectPower(AVXImpl::packed_size_ / 2); + return InternalExternalFunctions_Iter( + std::make_index_sequence()); +} // Symmetric two qubit gate without param end // Symmetric two qubit gate with param begin template class TwoQubitGateWithParamHelper { static_assert(sizeof(T) == -1, "Only specialized template can be used."); }; +/** + * @brief A helper class for two-qubit gate without parameters. + */ template requires TwoQubitGateWithoutParam class TwoQubitGateWithoutParamHelper { public: using Precision = typename AVXImpl::Precision; - using FuncType = void (*)(std::complex *, size_t, - const std::vector &, bool); + using ReturnType = typename Util::FuncReturn< + decltype(AVXImpl::applyExternalExternal)>::Type; + using FuncType = ReturnType (*)(std::complex *, size_t, + const std::vector &, bool); constexpr static size_t packed_size = AVXImpl::packed_size_; private: @@ -237,10 +337,19 @@ class TwoQubitGateWithoutParamHelper { explicit TwoQubitGateWithoutParamHelper(FuncType fallback_func) : fallback_func_{fallback_func} {} - void operator()(std::complex *arr, const size_t num_qubits, - const std::vector &wires, bool inverse) - const requires SymmetricTwoQubitGateWithoutParam { - assert(wires.size() == 2); + /** + * @brief A specialized function for symmetric two-qubit gates (control and + * target wires are symmetric). + * + * @param arr Pointer to a statevector array + * @param num_qubits Number of qubits + * @param wires Wires the gate applies to + * @param inverse Apply the inverse of the gate when true + */ + auto operator()(std::complex *arr, const size_t num_qubits, + const std::vector &wires, bool inverse) const + -> ReturnType requires SymmetricTwoQubitGateWithoutParam { + PL_ASSERT(wires.size() == 2); constexpr static size_t internal_wires = Util::log2PerfectPower(packed_size / 2); @@ -254,33 +363,38 @@ class TwoQubitGateWithoutParamHelper { const size_t rev_wire1 = num_qubits - wires[0] - 1; if (Util::exp2(num_qubits) < packed_size / 2) { - fallback_func_(arr, num_qubits, wires, inverse); - return; + return fallback_func_(arr, num_qubits, wires, inverse); } - if (rev_wire0 < internal_wires && rev_wire1 < internal_wires) { + if ((rev_wire0 < internal_wires) && (rev_wire1 < internal_wires)) { auto func = internal_internal_functions[rev_wire0][rev_wire1]; - (*func)(arr, num_qubits, inverse); - return; + return (*func)(arr, num_qubits, inverse); } const auto min_rev_wire = std::min(rev_wire0, rev_wire1); const auto max_rev_wire = std::max(rev_wire0, rev_wire1); if (min_rev_wire < internal_wires) { - (*internal_external_functions[min_rev_wire])(arr, num_qubits, - max_rev_wire, inverse); - return; + return (*internal_external_functions[min_rev_wire])( + arr, num_qubits, max_rev_wire, inverse); } - AVXImpl::applyExternalExternal(arr, num_qubits, rev_wire0, rev_wire1, - inverse); + return AVXImpl::applyExternalExternal(arr, num_qubits, rev_wire0, + rev_wire1, inverse); } - void operator()(std::complex *arr, const size_t num_qubits, - const std::vector &wires, bool inverse) - const requires AsymmetricTwoQubitGateWithoutParam { - assert(wires.size() == 2); + /** + * @brief A specialized function for asymmetric two-qubit gates. + * + * @param arr Pointer to a statevector array + * @param num_qubits Number of qubits + * @param wires Wires the gate applies to + * @param inverse Apply the inverse of the gate when true + */ + auto operator()(std::complex *arr, const size_t num_qubits, + const std::vector &wires, bool inverse) const + -> ReturnType requires AsymmetricTwoQubitGateWithoutParam { + PL_ASSERT(wires.size() == 2); constexpr static size_t internal_wires = Util::log2PerfectPower(packed_size / 2); @@ -297,40 +411,41 @@ class TwoQubitGateWithoutParamHelper { const size_t control = num_qubits - wires[0] - 1; if (Util::exp2(num_qubits) < packed_size / 2) { - fallback_func_(arr, num_qubits, wires, inverse); - return; + return fallback_func_(arr, num_qubits, wires, inverse); } - if (control < internal_wires && target < internal_wires) { + if ((control < internal_wires) && (target < internal_wires)) { auto func = internal_internal_functions[control][target]; - (*func)(arr, num_qubits, inverse); - return; + return (*func)(arr, num_qubits, inverse); } if (control < internal_wires) { - (*internal_external_functions[control])(arr, num_qubits, target, - inverse); - return; + return (*internal_external_functions[control])(arr, num_qubits, + target, inverse); } if (target < internal_wires) { - (*external_internal_functions[target])(arr, num_qubits, control, - inverse); - return; + return (*external_internal_functions[target])(arr, num_qubits, + control, inverse); } - AVXImpl::applyExternalExternal(arr, num_qubits, control, target, - inverse); + return AVXImpl::applyExternalExternal(arr, num_qubits, control, target, + inverse); } }; +/** + * @brief A helper class for two-qubit gate without parameters. + */ template -requires SymmetricTwoQubitGateWithParam +requires TwoQubitGateWithParam class TwoQubitGateWithParamHelper { public: using Precision = typename AVXImpl::Precision; - using FuncType = void (*)(std::complex *, size_t, - const std::vector &, bool, ParamT); + using ReturnType = typename Util::FuncReturn< + decltype(AVXImpl::template applyExternalExternal)>::Type; + using FuncType = ReturnType (*)(std::complex *, size_t, + const std::vector &, bool, ParamT); constexpr static size_t packed_size = AVXImpl::packed_size_; private: @@ -340,10 +455,21 @@ class TwoQubitGateWithParamHelper { explicit TwoQubitGateWithParamHelper(FuncType fallback_func) : fallback_func_{fallback_func} {} - void operator()(std::complex *arr, const size_t num_qubits, + /** + * @brief A specialized function for symmetric two-qubit gates (control and + * target wires are symmetric). + * + * @param arr Pointer to a statevector array + * @param num_qubits Number of qubits + * @param wires Wires the gate applies to + * @param inverse Apply the inverse of the gate when true + * @param angle Parameter of the gate + */ + auto operator()(std::complex *arr, const size_t num_qubits, const std::vector &wires, bool inverse, - ParamT angle) const { - assert(wires.size() == 2); + ParamT angle) const + -> ReturnType requires SymmetricTwoQubitGateWithParam { + PL_ASSERT(wires.size() == 2); constexpr static size_t internal_wires = Util::log2PerfectPower(packed_size / 2); @@ -357,26 +483,75 @@ class TwoQubitGateWithParamHelper { const size_t rev_wire1 = num_qubits - wires[0] - 1; if (Util::exp2(num_qubits) < packed_size / 2) { - fallback_func_(arr, num_qubits, wires, inverse, angle); - return; + return fallback_func_(arr, num_qubits, wires, inverse, angle); } if (rev_wire0 < internal_wires && rev_wire1 < internal_wires) { auto func = internal_internal_functions[rev_wire0][rev_wire1]; - (*func)(arr, num_qubits, inverse, angle); - return; + return (*func)(arr, num_qubits, inverse, angle); } const auto min_rev_wire = std::min(rev_wire0, rev_wire1); const auto max_rev_wire = std::max(rev_wire0, rev_wire1); if (min_rev_wire < internal_wires) { - (*internal_external_functions[min_rev_wire])( + return (*internal_external_functions[min_rev_wire])( arr, num_qubits, max_rev_wire, inverse, angle); - return; } - AVXImpl::applyExternalExternal(arr, num_qubits, rev_wire0, rev_wire1, - inverse, angle); + return AVXImpl::applyExternalExternal(arr, num_qubits, rev_wire0, + rev_wire1, inverse, angle); + } + + /** + * @brief A specialized function for asymmetric two-qubit gates. + * + * @param arr Pointer to a statevector array + * @param num_qubits Number of qubits + * @param wires Wires the gate applies to + * @param inverse Apply the inverse of the gate when true + * @param angle Parameter of the gate + */ + auto operator()(std::complex *arr, const size_t num_qubits, + const std::vector &wires, bool inverse, + ParamT angle) const + -> ReturnType requires AsymmetricTwoQubitGateWithParam { + PL_ASSERT(wires.size() == 2); + + constexpr static size_t internal_wires = + Util::log2PerfectPower(packed_size / 2); + constexpr static auto internal_internal_functions = + Internal::InternalInternalFunctions(); + + constexpr static auto internal_external_functions = + Internal::InternalExternalFunctions(); + + constexpr static auto external_internal_functions = + Internal::ExternalInternalFunctions(); + + const size_t target = num_qubits - wires[1] - 1; + const size_t control = num_qubits - wires[0] - 1; + + if (Util::exp2(num_qubits) < packed_size / 2) { + return fallback_func_(arr, num_qubits, wires, inverse, angle); + } + + if ((control < internal_wires) && (target < internal_wires)) { + auto func = internal_internal_functions[control][target]; + return (*func)(arr, num_qubits, inverse, angle); + } + + if (control < internal_wires) { + return (*internal_external_functions[control])( + arr, num_qubits, target, inverse, angle); + } + + if (target < internal_wires) { + return (*external_internal_functions[target])( + arr, num_qubits, control, inverse, angle); + } + + return AVXImpl::applyExternalExternal(arr, num_qubits, control, target, + inverse, angle); } }; } // namespace Pennylane::Gates::AVXCommon diff --git a/pennylane_lightning/src/tests/CMakeLists.txt b/pennylane_lightning/src/tests/CMakeLists.txt index b33b5122f3..8cb1f251cb 100644 --- a/pennylane_lightning/src/tests/CMakeLists.txt +++ b/pennylane_lightning/src/tests/CMakeLists.txt @@ -69,6 +69,7 @@ target_link_libraries(compile_time_tests lightning_compile_options lightning_gat set(TEST_SOURCES CreateAllWires.cpp Test_AdjDiff.cpp Test_AlgUtil.cpp + Test_AVXGateHelpers.cpp #Test_Bindings.cpp Test_CompilerSupport.cpp Test_DynamicDispatcher.cpp @@ -92,6 +93,7 @@ set(TEST_SOURCES CreateAllWires.cpp Test_StateVecAdjDiff.cpp Test_StateVectorManagedCPU.cpp Test_StateVectorRawCPU.cpp + Test_TypeTraits.cpp Test_Util.cpp) add_executable(runner ${TEST_SOURCES}) @@ -104,4 +106,4 @@ catch_discover_tests(runner) # are horrible if compile time constants are not well defined. add_dependencies(runner compile_time_tests) -install(TARGETS runner DESTINATION bin) \ No newline at end of file +install(TARGETS runner DESTINATION bin) diff --git a/pennylane_lightning/src/tests/Test_AVXGateHelpers.cpp b/pennylane_lightning/src/tests/Test_AVXGateHelpers.cpp new file mode 100644 index 0000000000..3074a97906 --- /dev/null +++ b/pennylane_lightning/src/tests/Test_AVXGateHelpers.cpp @@ -0,0 +1,339 @@ +#include "cpu_kernels/avx_common/SingleQubitGateHelper.hpp" +#include "cpu_kernels/avx_common/TwoQubitGateHelper.hpp" + +#include + +using namespace Pennylane::Gates::AVXCommon; + +template +struct MockSingleQubitGateWithoutParam { + using Precision = PrecisionT; + constexpr static size_t packed_size_ = packed_size; + + template + static std::tuple + applyInternal(std::complex *arr, const size_t num_qubits, + bool inverse) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + return {"applyInternal", rev_wire, inverse}; + } + + static std::tuple + applyExternal(std::complex *arr, const size_t num_qubits, + const size_t rev_wire, bool inverse) { + static_cast(arr); + static_cast(num_qubits); + static_cast(rev_wire); + static_cast(inverse); + return {"applyExternal", rev_wire, inverse}; + } +}; + +template +struct MockSingleQubitGateWithParam { + using Precision = PrecisionT; + constexpr static size_t packed_size_ = packed_size; + + template + static std::tuple + applyInternal(std::complex *arr, const size_t num_qubits, + bool inverse, ParamT angle) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + static_cast(angle); + return {"applyInternal", rev_wire, inverse}; + } + + template + static std::tuple + applyExternal(std::complex *arr, const size_t num_qubits, + const size_t rev_wire, bool inverse, ParamT angle) { + static_cast(arr); + static_cast(num_qubits); + static_cast(rev_wire); + static_cast(inverse); + static_cast(angle); + return {"applyExternal", rev_wire, inverse}; + } +}; + +/** + * @brief Mock class that only `applyExternal` takes a parameter (which is + * wrong). + */ +template +struct MockSingleQubitGateSomethingWrong { + using Precision = PrecisionT; + constexpr static size_t packed_size_ = packed_size; + + template + static std::tuple + applyInternal(std::complex *arr, const size_t num_qubits, + bool inverse) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + return {"applyInternal", rev_wire, inverse}; + } + + template + static std::tuple + applyExternal(std::complex *arr, const size_t num_qubits, + const size_t rev_wire, bool inverse, ParamT angle) { + static_cast(arr); + static_cast(num_qubits); + static_cast(rev_wire); + static_cast(inverse); + static_cast(angle); + return {"applyExternal", rev_wire, inverse}; + } +}; + +TEMPLATE_TEST_CASE("Test SingleQubitGateHelper template functions", + "[SingleQubitGateHelper]", float, double) { + STATIC_REQUIRE(HasInternalWithoutParam< + MockSingleQubitGateWithoutParam>::value); + STATIC_REQUIRE(!HasInternalWithParam< + MockSingleQubitGateWithoutParam>::value); + STATIC_REQUIRE(HasExternalWithoutParam< + MockSingleQubitGateWithoutParam>::value); + STATIC_REQUIRE(!HasExternalWithParam< + MockSingleQubitGateWithoutParam>::value); + + STATIC_REQUIRE(!HasInternalWithoutParam< + MockSingleQubitGateWithParam>::value); + STATIC_REQUIRE( + HasInternalWithParam>::value); + STATIC_REQUIRE(!HasExternalWithoutParam< + MockSingleQubitGateWithParam>::value); + STATIC_REQUIRE( + HasExternalWithParam>::value); + + STATIC_REQUIRE(HasInternalWithoutParam< + MockSingleQubitGateSomethingWrong>::value); + STATIC_REQUIRE(!HasInternalWithParam< + MockSingleQubitGateSomethingWrong>::value); + STATIC_REQUIRE(!HasExternalWithoutParam< + MockSingleQubitGateSomethingWrong>::value); + STATIC_REQUIRE(HasExternalWithParam< + MockSingleQubitGateSomethingWrong>::value); +} + +TEMPLATE_TEST_CASE("Test SingleQubitGateWithoutParamHelper", + "[SingleQubitGateHelper]", float, double) { + auto fallback = [](std::complex *arr, size_t num_qubits, + const std::vector &wires, + bool inverse) -> std::tuple { + static_cast(arr); + return {"fallback", num_qubits - wires[0] - 1, inverse}; + }; + SECTION("Test SingleQubitGateWithoutParamHelper with packed_size = 4") { + constexpr size_t packed_size = 4; + std::vector> arr( + 16, std::complex{0.0, 0.0}); + SingleQubitGateWithoutParamHelper< + MockSingleQubitGateWithoutParam> + func(fallback); + // We pack 4 real numbers -> 2 complex numbers -> single qubit. + // Thus only `rev_wire=0` calls the internal functions. + + for (bool inverse : {false, true}) { + { // num_qubits= 4, wires = {0} -> rev_wires = 3 + const auto res = func(arr.data(), 4, {0}, inverse); + REQUIRE(std::get<0>(res) == std::string("applyExternal")); + REQUIRE(std::get<1>(res) == 3); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits= 4, wires = {1} -> rev_wires = 2 + const auto res = func(arr.data(), 4, {1}, inverse); + REQUIRE(std::get<0>(res) == std::string("applyExternal")); + REQUIRE(std::get<1>(res) == 2); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits= 4, wires = {2} -> rev_wires = 1 + const auto res = func(arr.data(), 4, {2}, inverse); + REQUIRE(std::get<0>(res) == std::string("applyExternal")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits= 4, wires = {3} -> rev_wires = 0 + const auto res = func(arr.data(), 4, {3}, inverse); + REQUIRE(std::get<0>(res) == std::string("applyInternal")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits = 1 -> do not call fallback (as a single qubit + // statevector fits into the packed data type) + const auto res = func(arr.data(), 1, {0}, inverse); + REQUIRE(std::get<0>(res) != std::string("fallback")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == inverse); + } + } + } + + SECTION("Test SingleQubitGateWithoutParamHelper with packed_size = 8") { + constexpr size_t packed_size = 8; + std::vector> arr( + 16, std::complex{0.0, 0.0}); + SingleQubitGateWithoutParamHelper< + MockSingleQubitGateWithoutParam> + func(fallback); + // We pack 8 real numbers -> 4 complex numbers -> two qubit. + // Thus `rev_wire=0,1` calls the internal functions. + + for (bool inverse : {false, true}) { + { // num_qubits= 4, wires = {0} -> rev_wires = 3 + const auto res = func(arr.data(), 4, {0}, inverse); + REQUIRE(std::get<0>(res) == std::string("applyExternal")); + REQUIRE(std::get<1>(res) == 3); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits= 4, wires = {1} -> rev_wires = 2 + const auto res = func(arr.data(), 4, {1}, inverse); + REQUIRE(std::get<0>(res) == std::string("applyExternal")); + REQUIRE(std::get<1>(res) == 2); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits= 4, wires = {2} -> rev_wires = 1 + const auto res = func(arr.data(), 4, {2}, inverse); + REQUIRE(std::get<0>(res) == std::string("applyInternal")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits= 4, wires = {3} -> rev_wires = 0 + const auto res = func(arr.data(), 4, {3}, inverse); + REQUIRE(std::get<0>(res) == std::string("applyInternal")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits = 1 -> call fallback + const auto res = func(arr.data(), 1, {0}, inverse); + REQUIRE(std::get<0>(res) == std::string("fallback")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits = 2 -> do not call fallback (as two qubits + // statevector == 4 complex numbers fits to the packed data type) + const auto res = func(arr.data(), 2, {0}, inverse); + REQUIRE(std::get<0>(res) != std::string("fallback")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == inverse); + } + } + } +} + +TEMPLATE_TEST_CASE("Test SingleQubitGateWithParamHelper", + "[SingleQubitGateHelper]", float, double) { + auto fallback = + [](std::complex *arr, size_t num_qubits, + const std::vector &wires, bool inverse, + TestType angle) -> std::tuple { + static_cast(arr); + static_cast(angle); + return {"fallback", num_qubits - wires[0] - 1, inverse}; + }; + SECTION("Test SingleQubitGateWithoutParamHelper with packed_size = 4") { + constexpr size_t packed_size = 4; + std::vector> arr( + 16, std::complex{0.0, 0.0}); + SingleQubitGateWithParamHelper< + MockSingleQubitGateWithParam, TestType> + func(fallback); + // We pack 4 real numbers -> 2 complex numbers -> single qubit. + // Thus only `rev_wire=0` calls the internal functions. + + TestType angle = 0.312; + + for (bool inverse : {false, true}) { + { // num_qubits= 4, wires = {0} -> rev_wires = 3 + const auto res = func(arr.data(), 4, {0}, inverse, angle); + REQUIRE(std::get<0>(res) == std::string("applyExternal")); + REQUIRE(std::get<1>(res) == 3); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits= 4, wires = {1} -> rev_wires = 2 + const auto res = func(arr.data(), 4, {1}, inverse, angle); + REQUIRE(std::get<0>(res) == std::string("applyExternal")); + REQUIRE(std::get<1>(res) == 2); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits= 4, wires = {2} -> rev_wires = 1 + const auto res = func(arr.data(), 4, {2}, inverse, angle); + REQUIRE(std::get<0>(res) == std::string("applyExternal")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits= 4, wires = {3} -> rev_wires = 0 + const auto res = func(arr.data(), 4, {3}, inverse, angle); + REQUIRE(std::get<0>(res) == std::string("applyInternal")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits = 1 -> do not call fallback (as a single qubit + // statevector fits into the packed data type) + const auto res = func(arr.data(), 1, {0}, inverse, angle); + REQUIRE(std::get<0>(res) != std::string("fallback")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == inverse); + } + } + } + + SECTION("Test SingleQubitGateWithoutParamHelper with packed_size = 8") { + constexpr size_t packed_size = 8; + std::vector> arr( + 16, std::complex{0.0, 0.0}); + SingleQubitGateWithParamHelper< + MockSingleQubitGateWithParam, TestType> + func(fallback); + // We pack 8 real numbers -> 4 complex numbers -> two qubit. + // Thus `rev_wire=0,1` calls the internal functions. + + TestType angle = 0.312; + + for (bool inverse : {false, true}) { + { // num_qubits= 4, wires = {0} -> rev_wires = 3 + const auto res = func(arr.data(), 4, {0}, inverse, angle); + REQUIRE(std::get<0>(res) == std::string("applyExternal")); + REQUIRE(std::get<1>(res) == 3); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits= 4, wires = {1} -> rev_wires = 2 + const auto res = func(arr.data(), 4, {1}, inverse, angle); + REQUIRE(std::get<0>(res) == std::string("applyExternal")); + REQUIRE(std::get<1>(res) == 2); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits= 4, wires = {2} -> rev_wires = 1 + const auto res = func(arr.data(), 4, {2}, inverse, angle); + REQUIRE(std::get<0>(res) == std::string("applyInternal")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits= 4, wires = {3} -> rev_wires = 0 + const auto res = func(arr.data(), 4, {3}, inverse, angle); + REQUIRE(std::get<0>(res) == std::string("applyInternal")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits = 1 -> call fallback + const auto res = func(arr.data(), 1, {0}, inverse, angle); + REQUIRE(std::get<0>(res) == std::string("fallback")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == inverse); + } + { // num_qubits = 2 -> do not call fallback (as two qubits + // statevector == 4 complex numbers fits to the packed data type) + const auto res = func(arr.data(), 2, {0}, inverse, angle); + REQUIRE(std::get<0>(res) != std::string("fallback")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == inverse); + } + } + } +} diff --git a/pennylane_lightning/src/tests/Test_TypeTraits.cpp b/pennylane_lightning/src/tests/Test_TypeTraits.cpp new file mode 100644 index 0000000000..144f362ae2 --- /dev/null +++ b/pennylane_lightning/src/tests/Test_TypeTraits.cpp @@ -0,0 +1,53 @@ +#include "TypeTraits.hpp" + +#include + +#include +#include +#include + +using namespace Pennylane::Util; + +TEST_CASE("Test remove_complex") { + SECTION("remove_complex returns the floating point if the given type is " + "std::complex") { + STATIC_REQUIRE( + std::is_same_v>, float>); + STATIC_REQUIRE( + std::is_same_v>, double>); + } + SECTION("remove_complex returns the same type if not") { + STATIC_REQUIRE(std::is_same_v, float>); + STATIC_REQUIRE(std::is_same_v, double>); + } +} + +TEST_CASE("Test is_complex") { + SECTION("is_complex returns true if the given type is std::complex") { + STATIC_REQUIRE(is_complex_v>); + STATIC_REQUIRE(is_complex_v>); + } + SECTION("remove_complex returns false if not") { + STATIC_REQUIRE(!is_complex_v); + STATIC_REQUIRE(!is_complex_v); + STATIC_REQUIRE(!is_complex_v); + STATIC_REQUIRE(!is_complex_v); + } +} + +std::pair g(std::tuple); + +TEST_CASE("Test FuncReturn") { + SECTION("FuncReturn returns correctly returns the return type") { + static_assert( + std::is_same_v::Type, + std::pair>); // return type of g is + // std::pair + + using FuncPtr = std::pair (*)(std::tuple); + static_assert( + std::is_same_v::Type, + std::pair>); // return type of g is + // std::pair + } +} diff --git a/pennylane_lightning/src/util/TypeTraits.hpp b/pennylane_lightning/src/util/TypeTraits.hpp index c1372fddea..f834e202ed 100644 --- a/pennylane_lightning/src/util/TypeTraits.hpp +++ b/pennylane_lightning/src/util/TypeTraits.hpp @@ -31,4 +31,28 @@ template struct is_complex : std::false_type {}; template struct is_complex> : std::true_type {}; template constexpr bool is_complex_v = is_complex::value; + +/** + * @brief Function return type + * + * Usage: + * .. code-block::cpp + * + * std::pair g(std::tuple); + * static_assert(std::is_same_v::Type, + * std::pair>); // return type of g is std::pair + * + */ + +template struct FuncReturn { + // When instantiated + static_assert(sizeof(F) == -1, + "The given type is not a function. Currently, lambda " + "function is not supported."); +}; +template struct FuncReturn { + using Type = R; +}; +template struct FuncReturn { using Type = R; }; + } // namespace Pennylane::Util From 20a0180b8298469caac4c304e7488ca83e4105c2 Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Mon, 23 Jan 2023 20:15:14 +0000 Subject: [PATCH 02/22] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index 78c22e4cbc..1b5c51eed3 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.29.0-dev0" +__version__ = "0.29.0-dev1" From feddba9ebab6243407f89fe499b28b9fbb5c1ea8 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 23 Jan 2023 15:16:44 -0500 Subject: [PATCH 03/22] Fix --- pennylane_lightning/src/tests/Test_TypeTraits.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pennylane_lightning/src/tests/Test_TypeTraits.cpp b/pennylane_lightning/src/tests/Test_TypeTraits.cpp index 144f362ae2..47fcccae30 100644 --- a/pennylane_lightning/src/tests/Test_TypeTraits.cpp +++ b/pennylane_lightning/src/tests/Test_TypeTraits.cpp @@ -39,13 +39,13 @@ std::pair g(std::tuple); TEST_CASE("Test FuncReturn") { SECTION("FuncReturn returns correctly returns the return type") { - static_assert( + STATIC_REQUIRE( std::is_same_v::Type, std::pair>); // return type of g is // std::pair using FuncPtr = std::pair (*)(std::tuple); - static_assert( + STATIC_REQUIRE( std::is_same_v::Type, std::pair>); // return type of g is // std::pair From e136720a9228fa90662920bf9d8aefa2eb2332a9 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 23 Jan 2023 15:45:51 -0500 Subject: [PATCH 04/22] Update comment --- ..._AVXGateHelpers.cpp => Test_AVXSingleQubitGateHelpers.cpp} | 0 pennylane_lightning/src/util/TypeTraits.hpp | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename pennylane_lightning/src/tests/{Test_AVXGateHelpers.cpp => Test_AVXSingleQubitGateHelpers.cpp} (100%) diff --git a/pennylane_lightning/src/tests/Test_AVXGateHelpers.cpp b/pennylane_lightning/src/tests/Test_AVXSingleQubitGateHelpers.cpp similarity index 100% rename from pennylane_lightning/src/tests/Test_AVXGateHelpers.cpp rename to pennylane_lightning/src/tests/Test_AVXSingleQubitGateHelpers.cpp diff --git a/pennylane_lightning/src/util/TypeTraits.hpp b/pennylane_lightning/src/util/TypeTraits.hpp index f834e202ed..152a5ddb01 100644 --- a/pennylane_lightning/src/util/TypeTraits.hpp +++ b/pennylane_lightning/src/util/TypeTraits.hpp @@ -47,8 +47,8 @@ template constexpr bool is_complex_v = is_complex::value; template struct FuncReturn { // When instantiated static_assert(sizeof(F) == -1, - "The given type is not a function. Currently, lambda " - "function is not supported."); + "The given type is not a function. Currently, lambda" + "functions are not supported."); }; template struct FuncReturn { using Type = R; From 3d8edbf977c6d998d6d85967e1cd83207378396e Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 23 Jan 2023 20:01:01 -0500 Subject: [PATCH 05/22] Small fiX; format --- .../cpu_kernels/avx_common/ApplyIsingYY.hpp | 11 ++-- pennylane_lightning/src/tests/CMakeLists.txt | 2 +- .../tests/Test_AVXSingleQubitGateHelpers.cpp | 7 ++- .../src/tests/Test_AVXTwoQubitGateHelpers.cpp | 62 +++++++++++++++++++ 4 files changed, 74 insertions(+), 8 deletions(-) create mode 100644 pennylane_lightning/src/tests/Test_AVXTwoQubitGateHelpers.cpp diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingYY.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingYY.hpp index dc6e934531..0692218d27 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingYY.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyIsingYY.hpp @@ -151,8 +151,7 @@ template struct ApplyIsingYY { const auto cos_factor = set1(std::cos(angle / 2)); - const auto isin_factor_p = imagFactor(isin); - const auto isin_factor_m = imagFactor(-isin); + const auto isin_factor = imagFactor(isin); constexpr static auto perm = compilePermutation( swapRealImag(identity())); @@ -170,16 +169,16 @@ template struct ApplyIsingYY { const auto v11 = PrecisionAVXConcept::load(arr + i11); // 11 const auto prod_cos00 = cos_factor * v00; - const auto prod_isin00 = isin_factor_m * permute(v11); + const auto prod_isin00 = -isin_factor * permute(v11); const auto prod_cos01 = cos_factor * v01; - const auto prod_isin01 = isin_factor_p * permute(v10); + const auto prod_isin01 = isin_factor * permute(v10); const auto prod_cos10 = cos_factor * v10; - const auto prod_isin10 = isin_factor_p * permute(v01); + const auto prod_isin10 = isin_factor * permute(v01); const auto prod_cos11 = cos_factor * v11; - const auto prod_isin11 = isin_factor_m * permute(v00); + const auto prod_isin11 = -isin_factor * permute(v00); PrecisionAVXConcept::store(arr + i00, prod_cos00 + prod_isin00); PrecisionAVXConcept::store(arr + i01, prod_cos01 + prod_isin01); diff --git a/pennylane_lightning/src/tests/CMakeLists.txt b/pennylane_lightning/src/tests/CMakeLists.txt index 8cb1f251cb..b5216c181a 100644 --- a/pennylane_lightning/src/tests/CMakeLists.txt +++ b/pennylane_lightning/src/tests/CMakeLists.txt @@ -69,7 +69,7 @@ target_link_libraries(compile_time_tests lightning_compile_options lightning_gat set(TEST_SOURCES CreateAllWires.cpp Test_AdjDiff.cpp Test_AlgUtil.cpp - Test_AVXGateHelpers.cpp + Test_AVXSingleQubitGateHelpers.cpp #Test_Bindings.cpp Test_CompilerSupport.cpp Test_DynamicDispatcher.cpp diff --git a/pennylane_lightning/src/tests/Test_AVXSingleQubitGateHelpers.cpp b/pennylane_lightning/src/tests/Test_AVXSingleQubitGateHelpers.cpp index 3074a97906..8478f7c763 100644 --- a/pennylane_lightning/src/tests/Test_AVXSingleQubitGateHelpers.cpp +++ b/pennylane_lightning/src/tests/Test_AVXSingleQubitGateHelpers.cpp @@ -1,5 +1,4 @@ #include "cpu_kernels/avx_common/SingleQubitGateHelper.hpp" -#include "cpu_kernels/avx_common/TwoQubitGateHelper.hpp" #include @@ -120,6 +119,12 @@ TEMPLATE_TEST_CASE("Test SingleQubitGateHelper template functions", MockSingleQubitGateSomethingWrong>::value); STATIC_REQUIRE(HasExternalWithParam< MockSingleQubitGateSomethingWrong>::value); + + // Test concepts + STATIC_REQUIRE(SingleQubitGateWithoutParam< + MockSingleQubitGateWithoutParam>); + STATIC_REQUIRE( + SingleQubitGateWithParam>); } TEMPLATE_TEST_CASE("Test SingleQubitGateWithoutParamHelper", diff --git a/pennylane_lightning/src/tests/Test_AVXTwoQubitGateHelpers.cpp b/pennylane_lightning/src/tests/Test_AVXTwoQubitGateHelpers.cpp new file mode 100644 index 0000000000..cfa49f883e --- /dev/null +++ b/pennylane_lightning/src/tests/Test_AVXTwoQubitGateHelpers.cpp @@ -0,0 +1,62 @@ +#include "cpu_kernels/avx_common/TwoQubitGateHelper.hpp" + +#include + +using namespace Pennylane::Gates::AVXCommon; + +template +struct MockSymmetricTwoQubitGateWithoutParam { + using Precision = PrecisionT; + constexpr static size_t packed_size_ = packed_size; + constexpr static bool symmetric = true; + + template + static std::tuple + applyInternalInternal(std::complex *arr, + const size_t num_qubits, bool inverse) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + return {"applyInternal", rev_wire, inverse}; + } + + static std::tuple + applyExternal(std::complex *arr, const size_t num_qubits, + const size_t rev_wire, bool inverse) { + static_cast(arr); + static_cast(num_qubits); + static_cast(rev_wire); + static_cast(inverse); + return {"applyExternal", rev_wire, inverse}; + } +}; + +TEMPLATE_TEST_CASE("Test SingleQubitGateHelper template functions", + "[SingleQubitGateHelper]", float, double) { + STATIC_REQUIRE(HasInternalWithoutParam< + MockSingleQubitGateWithoutParam>::value); + STATIC_REQUIRE(!HasInternalWithParam< + MockSingleQubitGateWithoutParam>::value); + STATIC_REQUIRE(HasExternalWithoutParam< + MockSingleQubitGateWithoutParam>::value); + STATIC_REQUIRE(!HasExternalWithParam< + MockSingleQubitGateWithoutParam>::value); + + STATIC_REQUIRE(!HasInternalWithoutParam< + MockSingleQubitGateWithParam>::value); + STATIC_REQUIRE( + HasInternalWithParam>::value); + STATIC_REQUIRE(!HasExternalWithoutParam< + MockSingleQubitGateWithParam>::value); + STATIC_REQUIRE( + HasExternalWithParam>::value); + + STATIC_REQUIRE(HasInternalWithoutParam< + MockSingleQubitGateSomethingWrong>::value); + STATIC_REQUIRE(!HasInternalWithParam< + MockSingleQubitGateSomethingWrong>::value); + STATIC_REQUIRE(!HasExternalWithoutParam< + MockSingleQubitGateSomethingWrong>::value); + STATIC_REQUIRE(HasExternalWithParam< + MockSingleQubitGateSomethingWrong>::value); +} From 10c295a6b9fc054dd80e770118f24eb5f31e0888 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 24 Jan 2023 16:43:41 -0500 Subject: [PATCH 06/22] Add tests for AVX2/512 gate helpers --- .../avx_common/SingleQubitGateHelper.hpp | 20 +- .../avx_common/TwoQubitGateHelper.hpp | 185 ++-- pennylane_lightning/src/tests/CMakeLists.txt | 1 + .../tests/Test_AVXSingleQubitGateHelpers.cpp | 12 +- .../src/tests/Test_AVXTwoQubitGateHelpers.cpp | 904 +++++++++++++++++- 5 files changed, 989 insertions(+), 133 deletions(-) diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/SingleQubitGateHelper.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/SingleQubitGateHelper.hpp index b701565b26..d1501e8c30 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/SingleQubitGateHelper.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/SingleQubitGateHelper.hpp @@ -94,6 +94,12 @@ InternalFunctions_Iter([[maybe_unused]] std::index_sequence dummy) return std::array{&AVXImpl::template applyInternal...}; } +/** + * @brief Generate an array of function pointers to ``applyInternal`` functions + * with different rev_wires. + * + * @tparam AVXImpl Class implementing AVX2/512 gates without parameters + */ template constexpr auto InternalFunctions() -> decltype(auto) { constexpr size_t internal_wires = @@ -102,6 +108,12 @@ constexpr auto InternalFunctions() -> decltype(auto) { std::make_index_sequence()); } +/** + * @brief Generate an array of function pointers to ``applyInternal`` functions + * with different rev_wires. + * + * @tparam AVXImpl Class implementing AVX2/512 gate with a parameter + */ template constexpr auto InternalFunctions() -> decltype(auto) { constexpr size_t internal_wires = @@ -133,8 +145,8 @@ class SingleQubitGateWithoutParamHelper { : fallback_func_{fallback_func} {} /** - * @brief This function calls corresponding AVX2/512 by finding the correct - * one based on ``wires``. + * @brief This function calls corresponding AVX2/512 kernel functions by + * finding the correct one based on ``wires``. * * @param arr Pointer to a statevector array * @param num_qubits Number of qubits @@ -187,8 +199,8 @@ class SingleQubitGateWithParamHelper { : fallback_func_{fallback_func} {} /** - * @brief This function calls corresponding AVX2/512 by finding the correct - * one based on ``wires``. + * @brief This function calls corresponding AVX2/512 kernel functions by + * finding the correct one based on ``wires``. * * @param arr Pointer to a statevector array * @param num_qubits Number of qubits diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/TwoQubitGateHelper.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/TwoQubitGateHelper.hpp index 4678217547..44572a82f6 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/TwoQubitGateHelper.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/TwoQubitGateHelper.hpp @@ -27,6 +27,7 @@ #pragma once #include "BitUtil.hpp" #include "ConstantUtil.hpp" +#include "Error.hpp" #include "TypeTraits.hpp" #include @@ -135,49 +136,63 @@ concept TwoQubitGateWithoutParam = SymmetricTwoQubitGateWithoutParam || AsymmetricTwoQubitGateWithoutParam; namespace Internal { -// InternalInternal -template -constexpr auto InternalInternalFunctions_IterTargets( - [[maybe_unused]] std::index_sequence dummy) { - return std::array{ - &AVXImpl::template applyInternalInternal...}; -} - -template -constexpr auto InternalInternalFunctions_IterTargets( - [[maybe_unused]] std::index_sequence dummy) { - return std::array{ - &AVXImpl::template applyInternalInternal...}; -} - -template +// InternalInternal for two qubit gates with param begin +template constexpr auto InternalInternalFunctions_IterTargets( [[maybe_unused]] std::index_sequence dummy) { return std::array{&AVXImpl::template applyInternalInternal< std::min(control, target), std::max(control, target), ParamT>...}; } - -template +template constexpr auto InternalInternalFunctions_IterTargets( [[maybe_unused]] std::index_sequence dummy) { return std::array{ &AVXImpl::template applyInternalInternal...}; } - template constexpr auto InternalInternalFunctions_Iter( [[maybe_unused]] std::index_sequence dummy) { constexpr size_t internal_wires = Util::log2PerfectPower(AVXImpl::packed_size_ / 2); return Util::tuple_to_array(std::tuple{ - InternalInternalFunctions_IterTargets( + InternalInternalFunctions_IterTargets( std::make_index_sequence())...}); } +/** + * @brief Generate an array of function pointers + * to ``applyInternalInternal`` functions with different internal (control and + * target) wires. + * + * @tparam AVXImpl Class implementing AVX2/512 gates which are symmetric and + * with a parameter + */ +template +constexpr auto InternalInternalFunctions() { + constexpr size_t internal_wires = + Util::log2PerfectPower(AVXImpl::packed_size_ / 2); + return InternalInternalFunctions_Iter( + std::make_index_sequence()); +} +// InternalInternal for two qubit gates with param end + +// InternalInternal for two qubit gates without param start +template +constexpr auto InternalInternalFunctions_IterTargets( + [[maybe_unused]] std::index_sequence dummy) { + return std::array{ + &AVXImpl::template applyInternalInternal...}; +} +template +constexpr auto InternalInternalFunctions_IterTargets( + [[maybe_unused]] std::index_sequence dummy) { + return std::array{ + &AVXImpl::template applyInternalInternal...}; +} template constexpr auto InternalInternalFunctions_Iter( @@ -189,14 +204,14 @@ constexpr auto InternalInternalFunctions_Iter( std::make_index_sequence())...}); } -template -constexpr auto InternalInternalFunctions() -> decltype(auto) { - constexpr size_t internal_wires = - Util::log2PerfectPower(AVXImpl::packed_size_ / 2); - return InternalInternalFunctions_Iter( - std::make_index_sequence()); -} - +/** + * @brief Generate an array of function pointers + * to ``applyInternalInternal`` functions with different internal (control and + * target) wires. + * + * @tparam AVXImpl Class implementing AVX2/512 gates which are symmetric and + * without parameters + */ template constexpr auto InternalInternalFunctions() -> decltype(auto) { constexpr size_t internal_wires = @@ -205,7 +220,9 @@ constexpr auto InternalInternalFunctions() -> decltype(auto) { std::make_index_sequence()); } -// Asymmetric two qubit gate without param begin +// InternalInternal for two qubit gates without param end + +// ExternalInternal for two qubit gates without param start template constexpr auto ExternalInternalFunctions_Iter( [[maybe_unused]] std::index_sequence dummy) -> decltype(auto) { @@ -213,6 +230,14 @@ constexpr auto ExternalInternalFunctions_Iter( std::tuple{&AVXImpl::template applyExternalInternal...}); } +/** + * @brief Generate an array of function pointers to ``applyExternalInternal`` + * functions with different internal (target) wires. Note that + * ``applyExternalInternal`` functions are only defined for asymmetric gates. + * + * @tparam AVXImpl Class implementing AVX2/512 gates which are symmetric and + * without parameters + */ template constexpr auto ExternalInternalFunctions() -> decltype(auto) { constexpr size_t internal_wires = @@ -220,6 +245,9 @@ constexpr auto ExternalInternalFunctions() -> decltype(auto) { return ExternalInternalFunctions_Iter( std::make_index_sequence()); } +// ExternalInternal for two qubit gate without param end + +// ExternalInternal for two qubit gates with param start template constexpr auto ExternalInternalFunctions_Iter( @@ -228,6 +256,15 @@ constexpr auto ExternalInternalFunctions_Iter( &AVXImpl::template applyExternalInternal...}); } +/** + * @brief Generate an array of function pointers to ``applyExternalInternal`` + * functions with different internal (target) wires. Note that + * ``applyExternalInternal`` functions are only defined for asymmetric gates. + * + * @tparam AVXImpl Class implementing AVX2/512 gates which are symmetric and + * with a parameter + * @tparam ParamT Gate parameter type + */ template constexpr auto ExternalInternalFunctions() -> decltype(auto) { constexpr size_t internal_wires = @@ -235,7 +272,9 @@ constexpr auto ExternalInternalFunctions() -> decltype(auto) { return ExternalInternalFunctions_Iter( std::make_index_sequence()); } +// ExternalInternal for two qubit gate with param end +// InternalExternal for two qubit gates without param begin template constexpr auto InternalExternalFunctions_Iter( [[maybe_unused]] std::index_sequence dummy) -> decltype(auto) { @@ -249,7 +288,9 @@ constexpr auto InternalExternalFunctions() -> decltype(auto) { return InternalExternalFunctions_Iter( std::make_index_sequence()); } +// InternalExternal for two qubit gates without param end +// InternalExternal for two qubit gates with param start template constexpr auto InternalExternalFunctions_Iter( [[maybe_unused]] std::index_sequence dummy) -> decltype(auto) { @@ -257,6 +298,13 @@ constexpr auto InternalExternalFunctions_Iter( &AVXImpl::template applyInternalExternal...}; } +/** + * @brief Generate an array of function pointers + * to ``applyInternalExternal`` functions with different internal wires. + * + * @tparam AVXImpl Class implementing AVX2/512 gates which are symmetric and + * with a parameter + */ template constexpr auto InternalExternalFunctions() -> decltype(auto) { constexpr size_t internal_wires = @@ -264,58 +312,11 @@ constexpr auto InternalExternalFunctions() -> decltype(auto) { return InternalExternalFunctions_Iter( std::make_index_sequence()); } -// Symmetric two qubit gate without param end -// Symmetric two qubit gate with param begin -template -constexpr auto InternalInternalFunctions_IterTargets( - [[maybe_unused]] std::index_sequence dummy) { - return std::array{&AVXImpl::template applyInternalInternal< - std::min(control, target), std::max(control, target), ParamT>...}; -} - -template -constexpr auto InternalInternalFunctions_Iter( - [[maybe_unused]] std::index_sequence dummy) { - constexpr size_t internal_wires = - Util::log2PerfectPower(AVXImpl::packed_size_ / 2); - return Util::tuple_to_array(std::tuple{ - InternalInternalFunctions_IterTargets( - std::make_index_sequence())...}); -} - -template -constexpr auto InternalInternalFunctions() { - constexpr size_t internal_wires = - Util::log2PerfectPower(AVXImpl::packed_size_ / 2); - return InternalInternalFunctions_Iter( - std::make_index_sequence()); -} +// InternalExternal for two qubit gates with param end -template -constexpr auto InternalExternalFunctions_Iter( - [[maybe_unused]] std::index_sequence dummy) { - return std::array{ - &AVXImpl::template applyInternalExternal...}; -} - -template -constexpr auto InternalExternalFunctions() { - constexpr size_t internal_wires = - Util::log2PerfectPower(AVXImpl::packed_size_ / 2); - return InternalExternalFunctions_Iter( - std::make_index_sequence()); -} -// Symmetric two qubit gate with param end } // namespace Internal /// @endcond -template class TwoQubitGateWithParamHelper { - static_assert(sizeof(T) == -1, "Only specialized template can be used."); -}; - /** * @brief A helper class for two-qubit gate without parameters. */ @@ -338,8 +339,9 @@ class TwoQubitGateWithoutParamHelper { : fallback_func_{fallback_func} {} /** - * @brief A specialized function for symmetric two-qubit gates (control and - * target wires are symmetric). + * @brief A specialization for symmetric two-qubit gates (control and + * target wires are symmetric), which calls the correct AVX2/512 kernel + * functions based on ``wires``. * * @param arr Pointer to a statevector array * @param num_qubits Number of qubits @@ -384,7 +386,9 @@ class TwoQubitGateWithoutParamHelper { } /** - * @brief A specialized function for asymmetric two-qubit gates. + * @brief A specialization for asymmetric two-qubit gates (control and + * target wires are asymmetric), which calls the correct AVX2/512 kernel + * functions based on ``wires``. * * @param arr Pointer to a statevector array * @param num_qubits Number of qubits @@ -439,7 +443,7 @@ class TwoQubitGateWithoutParamHelper { */ template requires TwoQubitGateWithParam -class TwoQubitGateWithParamHelper { +class TwoQubitGateWithParamHelper { public: using Precision = typename AVXImpl::Precision; using ReturnType = typename Util::FuncReturn< @@ -456,8 +460,9 @@ class TwoQubitGateWithParamHelper { : fallback_func_{fallback_func} {} /** - * @brief A specialized function for symmetric two-qubit gates (control and - * target wires are symmetric). + * @brief A specialization for symmetric two-qubit gates (control and + * target wires are symmetric), which calls the correct AVX2/512 kernel + * functions based on ``wires``. * * @param arr Pointer to a statevector array * @param num_qubits Number of qubits @@ -503,7 +508,9 @@ class TwoQubitGateWithParamHelper { } /** - * @brief A specialized function for asymmetric two-qubit gates. + * @brief A specialization for asymmetric two-qubit gates (control and + * target wires are asymmetric), which calls the correct AVX2/512 kernel + * functions based on ``wires``. * * @param arr Pointer to a statevector array * @param num_qubits Number of qubits diff --git a/pennylane_lightning/src/tests/CMakeLists.txt b/pennylane_lightning/src/tests/CMakeLists.txt index b5216c181a..e904e1336c 100644 --- a/pennylane_lightning/src/tests/CMakeLists.txt +++ b/pennylane_lightning/src/tests/CMakeLists.txt @@ -70,6 +70,7 @@ set(TEST_SOURCES CreateAllWires.cpp Test_AdjDiff.cpp Test_AlgUtil.cpp Test_AVXSingleQubitGateHelpers.cpp + Test_AVXTwoQubitGateHelpers.cpp #Test_Bindings.cpp Test_CompilerSupport.cpp Test_DynamicDispatcher.cpp diff --git a/pennylane_lightning/src/tests/Test_AVXSingleQubitGateHelpers.cpp b/pennylane_lightning/src/tests/Test_AVXSingleQubitGateHelpers.cpp index 8478f7c763..41f78c7d62 100644 --- a/pennylane_lightning/src/tests/Test_AVXSingleQubitGateHelpers.cpp +++ b/pennylane_lightning/src/tests/Test_AVXSingleQubitGateHelpers.cpp @@ -143,7 +143,7 @@ TEMPLATE_TEST_CASE("Test SingleQubitGateWithoutParamHelper", MockSingleQubitGateWithoutParam> func(fallback); // We pack 4 real numbers -> 2 complex numbers -> single qubit. - // Thus only `rev_wire=0` calls the internal functions. + // Thus only rev_wire = 0 calls the internal functions. for (bool inverse : {false, true}) { { // num_qubits= 4, wires = {0} -> rev_wires = 3 @@ -187,8 +187,8 @@ TEMPLATE_TEST_CASE("Test SingleQubitGateWithoutParamHelper", SingleQubitGateWithoutParamHelper< MockSingleQubitGateWithoutParam> func(fallback); - // We pack 8 real numbers -> 4 complex numbers -> two qubit. - // Thus `rev_wire=0,1` calls the internal functions. + // We pack 8 real numbers -> 4 complex numbers -> two qubits. + // Thus rev_wire = 0 or 1 calls the internal functions. for (bool inverse : {false, true}) { { // num_qubits= 4, wires = {0} -> rev_wires = 3 @@ -250,7 +250,7 @@ TEMPLATE_TEST_CASE("Test SingleQubitGateWithParamHelper", MockSingleQubitGateWithParam, TestType> func(fallback); // We pack 4 real numbers -> 2 complex numbers -> single qubit. - // Thus only `rev_wire=0` calls the internal functions. + // Thus only rev_wire = 0 calls the internal functions. TestType angle = 0.312; @@ -296,8 +296,8 @@ TEMPLATE_TEST_CASE("Test SingleQubitGateWithParamHelper", SingleQubitGateWithParamHelper< MockSingleQubitGateWithParam, TestType> func(fallback); - // We pack 8 real numbers -> 4 complex numbers -> two qubit. - // Thus `rev_wire=0,1` calls the internal functions. + // We pack 8 real numbers -> 4 complex numbers -> two qubits. + // Thus rev_wire = 0 or 1 calls the internal functions. TestType angle = 0.312; diff --git a/pennylane_lightning/src/tests/Test_AVXTwoQubitGateHelpers.cpp b/pennylane_lightning/src/tests/Test_AVXTwoQubitGateHelpers.cpp index cfa49f883e..c9119a970a 100644 --- a/pennylane_lightning/src/tests/Test_AVXTwoQubitGateHelpers.cpp +++ b/pennylane_lightning/src/tests/Test_AVXTwoQubitGateHelpers.cpp @@ -4,6 +4,11 @@ using namespace Pennylane::Gates::AVXCommon; +/** + * Define mock classes. For symmetric gate, we do not have + * ``applyExternalInternal`` member function. + */ + template struct MockSymmetricTwoQubitGateWithoutParam { using Precision = PrecisionT; @@ -17,46 +22,877 @@ struct MockSymmetricTwoQubitGateWithoutParam { static_cast(arr); static_cast(num_qubits); static_cast(inverse); - return {"applyInternal", rev_wire, inverse}; + return {"applyInternalInternal", rev_wire0, rev_wire1, inverse}; + } + + template + static std::tuple + applyInternalExternal(std::complex *arr, + const size_t num_qubits, size_t rev_wire1, + bool inverse) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + return {"applyInternalExternal", rev_wire0, rev_wire1, inverse}; + } + + static std::tuple + applyExternalExternal(std::complex *arr, + const size_t num_qubits, size_t rev_wire0, + size_t rev_wire1, bool inverse) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + return {"applyExternalExternal", rev_wire0, rev_wire1, inverse}; + } +}; + +template +struct MockSymmetricTwoQubitGateWithParam { + using Precision = PrecisionT; + constexpr static size_t packed_size_ = packed_size; + constexpr static bool symmetric = true; + + template + static std::tuple + applyInternalInternal(std::complex *arr, + const size_t num_qubits, bool inverse, ParamT angle) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + static_cast(angle); + return {"applyInternalInternal", rev_wire0, rev_wire1, inverse}; + } + + template + static std::tuple + applyInternalExternal(std::complex *arr, + const size_t num_qubits, size_t rev_wire1, + bool inverse, ParamT angle) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + static_cast(angle); + return {"applyInternalExternal", rev_wire0, rev_wire1, inverse}; + } + + template + static std::tuple + applyExternalExternal(std::complex *arr, + const size_t num_qubits, size_t rev_wire0, + size_t rev_wire1, bool inverse, ParamT angle) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + static_cast(angle); + return {"applyExternalExternal", rev_wire0, rev_wire1, inverse}; + } +}; + +template +struct MockAsymmetricTwoQubitGateWithoutParam { + using Precision = PrecisionT; + constexpr static size_t packed_size_ = packed_size; + constexpr static bool symmetric = false; + + template + static std::tuple + applyInternalInternal(std::complex *arr, + const size_t num_qubits, bool inverse) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + return {"applyInternalInternal", rev_wire0, rev_wire1, inverse}; } - static std::tuple - applyExternal(std::complex *arr, const size_t num_qubits, - const size_t rev_wire, bool inverse) { + template + static std::tuple + applyInternalExternal(std::complex *arr, + const size_t num_qubits, size_t target, + bool inverse) { static_cast(arr); static_cast(num_qubits); - static_cast(rev_wire); static_cast(inverse); - return {"applyExternal", rev_wire, inverse}; + return {"applyInternalExternal", control, target, inverse}; + } + + template + static std::tuple + applyExternalInternal(std::complex *arr, + const size_t num_qubits, size_t control, + bool inverse) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + return {"applyExternalInternal", control, target, inverse}; + } + + static std::tuple + applyExternalExternal(std::complex *arr, + const size_t num_qubits, size_t rev_wire0, + size_t rev_wire1, bool inverse) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + return {"applyExternalExternal", rev_wire0, rev_wire1, inverse}; } }; -TEMPLATE_TEST_CASE("Test SingleQubitGateHelper template functions", - "[SingleQubitGateHelper]", float, double) { - STATIC_REQUIRE(HasInternalWithoutParam< - MockSingleQubitGateWithoutParam>::value); - STATIC_REQUIRE(!HasInternalWithParam< - MockSingleQubitGateWithoutParam>::value); - STATIC_REQUIRE(HasExternalWithoutParam< - MockSingleQubitGateWithoutParam>::value); - STATIC_REQUIRE(!HasExternalWithParam< - MockSingleQubitGateWithoutParam>::value); - - STATIC_REQUIRE(!HasInternalWithoutParam< - MockSingleQubitGateWithParam>::value); - STATIC_REQUIRE( - HasInternalWithParam>::value); - STATIC_REQUIRE(!HasExternalWithoutParam< - MockSingleQubitGateWithParam>::value); - STATIC_REQUIRE( - HasExternalWithParam>::value); - - STATIC_REQUIRE(HasInternalWithoutParam< - MockSingleQubitGateSomethingWrong>::value); - STATIC_REQUIRE(!HasInternalWithParam< - MockSingleQubitGateSomethingWrong>::value); - STATIC_REQUIRE(!HasExternalWithoutParam< - MockSingleQubitGateSomethingWrong>::value); - STATIC_REQUIRE(HasExternalWithParam< - MockSingleQubitGateSomethingWrong>::value); +template +struct MockAsymmetricTwoQubitGateWithParam { + using Precision = PrecisionT; + constexpr static size_t packed_size_ = packed_size; + constexpr static bool symmetric = false; + + template + static std::tuple + applyInternalInternal(std::complex *arr, + const size_t num_qubits, bool inverse, ParamT angle) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + static_cast(angle); + return {"applyInternalInternal", rev_wire0, rev_wire1, inverse}; + } + + template + static std::tuple + applyInternalExternal(std::complex *arr, + const size_t num_qubits, size_t target, bool inverse, + ParamT angle) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + static_cast(angle); + return {"applyInternalExternal", control, target, inverse}; + } + + template + static std::tuple + applyExternalInternal(std::complex *arr, + const size_t num_qubits, size_t control, bool inverse, + ParamT angle) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + static_cast(angle); + return {"applyExternalInternal", control, target, inverse}; + } + + template + static std::tuple + applyExternalExternal(std::complex *arr, + const size_t num_qubits, size_t rev_wire0, + size_t rev_wire1, bool inverse, ParamT angle) { + static_cast(arr); + static_cast(num_qubits); + static_cast(inverse); + static_cast(angle); + return {"applyExternalExternal", rev_wire0, rev_wire1, inverse}; + } +}; + +TEMPLATE_TEST_CASE("Test TwoQubitGateHelper template functions", + "[TwoQubitGateHelper]", float, double) { + // Template functions detecting existing functions without params + STATIC_REQUIRE(HasInternalInternalWithoutParam< + MockSymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(HasInternalInternalWithoutParam< + MockAsymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(!HasInternalInternalWithoutParam< + MockSymmetricTwoQubitGateWithParam>::value); + STATIC_REQUIRE(!HasInternalInternalWithoutParam< + MockAsymmetricTwoQubitGateWithParam>::value); + + STATIC_REQUIRE(HasInternalExternalWithoutParam< + MockSymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(HasInternalExternalWithoutParam< + MockAsymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(!HasInternalExternalWithoutParam< + MockSymmetricTwoQubitGateWithParam>::value); + STATIC_REQUIRE(!HasInternalExternalWithoutParam< + MockAsymmetricTwoQubitGateWithParam>::value); + + STATIC_REQUIRE(!HasExternalInternalWithoutParam< + MockSymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(HasExternalInternalWithoutParam< + MockAsymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(!HasExternalInternalWithoutParam< + MockSymmetricTwoQubitGateWithParam>::value); + STATIC_REQUIRE(!HasExternalInternalWithoutParam< + MockAsymmetricTwoQubitGateWithParam>::value); + + STATIC_REQUIRE(HasExternalExternalWithoutParam< + MockSymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(HasExternalExternalWithoutParam< + MockAsymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(!HasExternalExternalWithoutParam< + MockSymmetricTwoQubitGateWithParam>::value); + STATIC_REQUIRE(!HasExternalExternalWithoutParam< + MockAsymmetricTwoQubitGateWithParam>::value); + + // Template functions detecting existing functions with params + STATIC_REQUIRE(!HasInternalInternalWithParam< + MockSymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(!HasInternalInternalWithParam< + MockAsymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(HasInternalInternalWithParam< + MockSymmetricTwoQubitGateWithParam>::value); + STATIC_REQUIRE(HasInternalInternalWithParam< + MockAsymmetricTwoQubitGateWithParam>::value); + + STATIC_REQUIRE(!HasInternalExternalWithParam< + MockSymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(!HasInternalExternalWithParam< + MockAsymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(HasInternalExternalWithParam< + MockSymmetricTwoQubitGateWithParam>::value); + STATIC_REQUIRE(HasInternalExternalWithParam< + MockAsymmetricTwoQubitGateWithParam>::value); + + STATIC_REQUIRE(!HasExternalInternalWithParam< + MockSymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(!HasExternalInternalWithParam< + MockAsymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(!HasExternalInternalWithParam< + MockSymmetricTwoQubitGateWithParam>::value); + STATIC_REQUIRE(HasExternalInternalWithParam< + MockAsymmetricTwoQubitGateWithParam>::value); + + STATIC_REQUIRE(!HasExternalExternalWithParam< + MockSymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(!HasExternalExternalWithParam< + MockAsymmetricTwoQubitGateWithoutParam>::value); + STATIC_REQUIRE(HasExternalExternalWithParam< + MockSymmetricTwoQubitGateWithParam>::value); + STATIC_REQUIRE(HasExternalExternalWithParam< + MockAsymmetricTwoQubitGateWithParam>::value); + + // Test concepts + STATIC_REQUIRE(!SymmetricTwoQubitGateWithParam< + MockSymmetricTwoQubitGateWithoutParam>); + STATIC_REQUIRE(!SymmetricTwoQubitGateWithParam< + MockAsymmetricTwoQubitGateWithoutParam>); + STATIC_REQUIRE(SymmetricTwoQubitGateWithParam< + MockSymmetricTwoQubitGateWithParam>); + STATIC_REQUIRE(!SymmetricTwoQubitGateWithParam< + MockAsymmetricTwoQubitGateWithParam>); + + STATIC_REQUIRE(!AsymmetricTwoQubitGateWithParam< + MockSymmetricTwoQubitGateWithoutParam>); + STATIC_REQUIRE(!AsymmetricTwoQubitGateWithParam< + MockAsymmetricTwoQubitGateWithoutParam>); + STATIC_REQUIRE(!AsymmetricTwoQubitGateWithParam< + MockSymmetricTwoQubitGateWithParam>); + STATIC_REQUIRE(AsymmetricTwoQubitGateWithParam< + MockAsymmetricTwoQubitGateWithParam>); + + STATIC_REQUIRE(SymmetricTwoQubitGateWithoutParam< + MockSymmetricTwoQubitGateWithoutParam>); + STATIC_REQUIRE(!SymmetricTwoQubitGateWithoutParam< + MockAsymmetricTwoQubitGateWithoutParam>); + STATIC_REQUIRE(!SymmetricTwoQubitGateWithoutParam< + MockSymmetricTwoQubitGateWithParam>); + STATIC_REQUIRE(!SymmetricTwoQubitGateWithoutParam< + MockAsymmetricTwoQubitGateWithParam>); + + STATIC_REQUIRE(SymmetricTwoQubitGateWithoutParam< + MockSymmetricTwoQubitGateWithoutParam>); + STATIC_REQUIRE(!SymmetricTwoQubitGateWithoutParam< + MockAsymmetricTwoQubitGateWithoutParam>); + STATIC_REQUIRE(!SymmetricTwoQubitGateWithoutParam< + MockSymmetricTwoQubitGateWithParam>); + STATIC_REQUIRE(!SymmetricTwoQubitGateWithoutParam< + MockAsymmetricTwoQubitGateWithParam>); +} + +std::pair sort(size_t a, size_t b) { + return {std::min(a, b), std::max(a, b)}; +} + +TEMPLATE_TEST_CASE("Test TwoQubitGateWithoutParamHelper", + "[TwoQubitGateHelper]", float, double) { + auto fallback = + [](std::complex *arr, size_t num_qubits, + const std::vector &wires, + bool inverse) -> std::tuple { + static_cast(arr); + return {"fallback", num_qubits - wires[0] - 1, + num_qubits - wires[1] - 1, inverse}; + }; + + SECTION("Test TwoQubitGateWithoutParamHelper for symmetric gates with " + "packed_size = 8") { + constexpr size_t packed_size = 8; + std::vector> arr( + 16, std::complex{0.0, 0.0}); + TwoQubitGateWithoutParamHelper< + MockSymmetricTwoQubitGateWithoutParam> + func(fallback); + + // We pack 8 real numbers -> 4 complex numbers -> two qubits + // rev_wire in {0, 1} is internal + + for (bool inverse : {false, true}) { + { // num_qubits = 4, wires = {0, 1} -> rev_wires = {3, 2} + const auto res = func(arr.data(), 4, {0, 1}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyExternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{2, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {1, 0} -> rev_wires = {2, 3} + const auto res = func(arr.data(), 4, {1, 0}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyExternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{2, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {0, 3} -> rev_wires = {0, 3} + const auto res = func(arr.data(), 4, {0, 3}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 0} -> rev_wires = {3, 0} + const auto res = func(arr.data(), 4, {3, 0}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {2, 3} -> rev_wires = {0, 1} + const auto res = func(arr.data(), 4, {2, 3}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 1}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 2} -> rev_wires = {1, 0} + const auto res = func(arr.data(), 4, {3, 2}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 1}); + REQUIRE(std::get<3>(res) == inverse); + } + } + } + + SECTION("Test TwoQubitGateWithoutParamHelper for symmetric gates with " + "packed_size = 16") { + constexpr size_t packed_size = 16; + std::vector> arr( + 16, std::complex{0.0, 0.0}); + TwoQubitGateWithoutParamHelper< + MockSymmetricTwoQubitGateWithoutParam> + func(fallback); + + // We pack 16 real numbers -> 8 complex numbers -> three qubits + // rev_wire in {0, 1, 2} is internal + + for (bool inverse : {false, true}) { + { // num_qubits = 4, wires = {0, 1} -> rev_wires = {2, 3} + const auto res = func(arr.data(), 4, {0, 1}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{2, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {1, 0} -> rev_wires = {3, 2} + const auto res = func(arr.data(), 4, {1, 0}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{2, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {0, 3} -> rev_wires = {0, 3} + const auto res = func(arr.data(), 4, {0, 3}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 0} -> rev_wires = {3, 0} + const auto res = func(arr.data(), 4, {3, 0}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {1, 3} -> rev_wires = {0, 2} + const auto res = func(arr.data(), 4, {1, 3}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 2}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 1} -> rev_wires = {0, 2} + const auto res = func(arr.data(), 4, {3, 1}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 2}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 2, wires = {0, 1} -> fallback + const auto res = func(arr.data(), 2, {0, 1}, inverse); + REQUIRE(std::get<0>(res) == std::string("fallback")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == 0); + REQUIRE(std::get<3>(res) == inverse); + } + } + } + + SECTION("Test TwoQubitGateWithoutParamHelper for asymmetric gates with " + "packed_size = 8") { + constexpr size_t packed_size = 8; + std::vector> arr( + 16, std::complex{0.0, 0.0}); + TwoQubitGateWithoutParamHelper< + MockAsymmetricTwoQubitGateWithoutParam> + func(fallback); + + // We pack 8 real numbers -> 4 complex numbers -> two qubits + // rev_wire in {0, 1} is internal + // The second wire is the target wire + + for (bool inverse : {false, true}) { + { // num_qubits = 4, wires = {0, 1} -> rev_wires = {3, 2} + const auto res = func(arr.data(), 4, {0, 1}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyExternalExternal")); + REQUIRE(std::get<1>(res) == 3); + REQUIRE(std::get<2>(res) == 2); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {1, 0} -> rev_wires = {2, 3} + const auto res = func(arr.data(), 4, {1, 0}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyExternalExternal")); + REQUIRE(std::get<1>(res) == 2); + REQUIRE(std::get<2>(res) == 3); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {0, 3} -> rev_wires = {3, 0} + const auto res = func(arr.data(), 4, {0, 3}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyExternalInternal")); + REQUIRE(std::get<1>(res) == 3); + REQUIRE(std::get<2>(res) == 0); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 0} -> rev_wires = {0, 3} + const auto res = func(arr.data(), 4, {3, 0}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == 3); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {2, 3} -> rev_wires = {1, 0} + const auto res = func(arr.data(), 4, {2, 3}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == 0); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 2} -> rev_wires = {0, 1} + const auto res = func(arr.data(), 4, {3, 2}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == 1); + REQUIRE(std::get<3>(res) == inverse); + } + } + } + + SECTION("Test TwoQubitGateWithoutParamHelper for asymmetric gates with " + "packed_size = 16") { + constexpr size_t packed_size = 16; + std::vector> arr( + 16, std::complex{0.0, 0.0}); + TwoQubitGateWithoutParamHelper< + MockAsymmetricTwoQubitGateWithoutParam> + func(fallback); + + // We pack 16 real numbers -> 8 complex numbers -> three qubits + // rev_wire in {0, 1, 2} is internal + // The second wire is the target wire + + for (bool inverse : {false, true}) { + { // num_qubits = 4, wires = {0, 1} -> rev_wires = {3, 2} + const auto res = func(arr.data(), 4, {0, 1}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyExternalInternal")); + REQUIRE(std::get<1>(res) == 3); + REQUIRE(std::get<2>(res) == 2); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {1, 0} -> rev_wires = {2, 3} + const auto res = func(arr.data(), 4, {1, 0}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(std::get<1>(res) == 2); + REQUIRE(std::get<2>(res) == 3); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {0, 3} -> rev_wires = {3, 0} + const auto res = func(arr.data(), 4, {0, 3}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyExternalInternal")); + REQUIRE(std::get<1>(res) == 3); + REQUIRE(std::get<2>(res) == 0); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 0} -> rev_wires = {0, 3} + const auto res = func(arr.data(), 4, {3, 0}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == 3); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {2, 3} -> rev_wires = {1, 0} + const auto res = func(arr.data(), 4, {2, 3}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == 0); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 2} -> rev_wires = {0, 1} + const auto res = func(arr.data(), 4, {3, 2}, inverse); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == 1); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 2, wires = {0, 1} -> fallback + const auto res = func(arr.data(), 2, {0, 1}, inverse); + REQUIRE(std::get<0>(res) == std::string("fallback")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == 0); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 2, wires = {1, 0} -> fallback + const auto res = func(arr.data(), 2, {1, 0}, inverse); + REQUIRE(std::get<0>(res) == std::string("fallback")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == 1); + REQUIRE(std::get<3>(res) == inverse); + } + } + } +} + +TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]", + float, double) { + auto fallback = + [](std::complex *arr, size_t num_qubits, + const std::vector &wires, bool inverse, + TestType angle) -> std::tuple { + static_cast(arr); + static_cast(angle); + return {"fallback", num_qubits - wires[0] - 1, + num_qubits - wires[1] - 1, inverse}; + }; + + const TestType angle = static_cast(0.312); + + SECTION("Test TwoQubitGateWithParamHelper for symmetric gates with " + "packed_size = 8") { + constexpr size_t packed_size = 8; + std::vector> arr( + 16, std::complex{0.0, 0.0}); + TwoQubitGateWithParamHelper< + MockSymmetricTwoQubitGateWithParam, TestType> + func(fallback); + + // We pack 8 real numbers -> 4 complex numbers -> two qubits + // rev_wire in {0, 1} is internal + + for (bool inverse : {false, true}) { + { // num_qubits = 4, wires = {0, 1} -> rev_wires = {3, 2} + const auto res = func(arr.data(), 4, {0, 1}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyExternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{2, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {1, 0} -> rev_wires = {2, 3} + const auto res = func(arr.data(), 4, {1, 0}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyExternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{2, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {0, 3} -> rev_wires = {0, 3} + const auto res = func(arr.data(), 4, {0, 3}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 0} -> rev_wires = {3, 0} + const auto res = func(arr.data(), 4, {3, 0}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {2, 3} -> rev_wires = {0, 1} + const auto res = func(arr.data(), 4, {2, 3}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 1}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 2} -> rev_wires = {1, 0} + const auto res = func(arr.data(), 4, {3, 2}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 1}); + REQUIRE(std::get<3>(res) == inverse); + } + } + } + + SECTION("Test TwoQubitGateWithParamHelper for symmetric gates with " + "packed_size = 16") { + constexpr size_t packed_size = 16; + std::vector> arr( + 16, std::complex{0.0, 0.0}); + TwoQubitGateWithParamHelper< + MockSymmetricTwoQubitGateWithParam, TestType> + func(fallback); + + // We pack 16 real numbers -> 8 complex numbers -> three qubits + // rev_wire in {0, 1, 2} is internal + + for (bool inverse : {false, true}) { + { // num_qubits = 4, wires = {0, 1} -> rev_wires = {2, 3} + const auto res = func(arr.data(), 4, {0, 1}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{2, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {1, 0} -> rev_wires = {3, 2} + const auto res = func(arr.data(), 4, {1, 0}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{2, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {0, 3} -> rev_wires = {0, 3} + const auto res = func(arr.data(), 4, {0, 3}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 0} -> rev_wires = {3, 0} + const auto res = func(arr.data(), 4, {3, 0}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 3}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {1, 3} -> rev_wires = {0, 2} + const auto res = func(arr.data(), 4, {1, 3}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 2}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 1} -> rev_wires = {0, 2} + const auto res = func(arr.data(), 4, {3, 1}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(sort(std::get<1>(res), std::get<2>(res)) == + std::pair{0, 2}); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 2, wires = {0, 1} -> fallback + const auto res = func(arr.data(), 2, {0, 1}, inverse, angle); + REQUIRE(std::get<0>(res) == std::string("fallback")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == 0); + REQUIRE(std::get<3>(res) == inverse); + } + } + } + + SECTION("Test TwoQubitGateWithParamHelper for asymmetric gates with " + "packed_size = 8") { + constexpr size_t packed_size = 8; + std::vector> arr( + 16, std::complex{0.0, 0.0}); + TwoQubitGateWithParamHelper< + MockAsymmetricTwoQubitGateWithParam, + TestType> + func(fallback); + + // We pack 8 real numbers -> 4 complex numbers -> two qubits + // rev_wire in {0, 1} is internal + // The second wire is the target wire + + for (bool inverse : {false, true}) { + { // num_qubits = 4, wires = {0, 1} -> rev_wires = {3, 2} + const auto res = func(arr.data(), 4, {0, 1}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyExternalExternal")); + REQUIRE(std::get<1>(res) == 3); + REQUIRE(std::get<2>(res) == 2); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {1, 0} -> rev_wires = {2, 3} + const auto res = func(arr.data(), 4, {1, 0}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyExternalExternal")); + REQUIRE(std::get<1>(res) == 2); + REQUIRE(std::get<2>(res) == 3); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {0, 3} -> rev_wires = {3, 0} + const auto res = func(arr.data(), 4, {0, 3}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyExternalInternal")); + REQUIRE(std::get<1>(res) == 3); + REQUIRE(std::get<2>(res) == 0); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 0} -> rev_wires = {0, 3} + const auto res = func(arr.data(), 4, {3, 0}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == 3); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {2, 3} -> rev_wires = {1, 0} + const auto res = func(arr.data(), 4, {2, 3}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == 0); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 2} -> rev_wires = {0, 1} + const auto res = func(arr.data(), 4, {3, 2}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == 1); + REQUIRE(std::get<3>(res) == inverse); + } + } + } + + SECTION("Test TwoQubitGateWithParamHelper for asymmetric gates with " + "packed_size = 16") { + constexpr size_t packed_size = 16; + std::vector> arr( + 16, std::complex{0.0, 0.0}); + TwoQubitGateWithParamHelper< + MockAsymmetricTwoQubitGateWithParam, + TestType> + func(fallback); + + // We pack 16 real numbers -> 8 complex numbers -> three qubits + // rev_wire in {0, 1, 2} is internal + // The second wire is the target wire + + for (bool inverse : {false, true}) { + { // num_qubits = 4, wires = {0, 1} -> rev_wires = {3, 2} + const auto res = func(arr.data(), 4, {0, 1}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyExternalInternal")); + REQUIRE(std::get<1>(res) == 3); + REQUIRE(std::get<2>(res) == 2); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {1, 0} -> rev_wires = {2, 3} + const auto res = func(arr.data(), 4, {1, 0}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(std::get<1>(res) == 2); + REQUIRE(std::get<2>(res) == 3); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {0, 3} -> rev_wires = {3, 0} + const auto res = func(arr.data(), 4, {0, 3}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyExternalInternal")); + REQUIRE(std::get<1>(res) == 3); + REQUIRE(std::get<2>(res) == 0); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 0} -> rev_wires = {0, 3} + const auto res = func(arr.data(), 4, {3, 0}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalExternal")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == 3); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {2, 3} -> rev_wires = {1, 0} + const auto res = func(arr.data(), 4, {2, 3}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == 0); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 4, wires = {3, 2} -> rev_wires = {0, 1} + const auto res = func(arr.data(), 4, {3, 2}, inverse, angle); + REQUIRE(std::get<0>(res) == + std::string("applyInternalInternal")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == 1); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 2, wires = {0, 1} -> fallback + const auto res = func(arr.data(), 2, {0, 1}, inverse, angle); + REQUIRE(std::get<0>(res) == std::string("fallback")); + REQUIRE(std::get<1>(res) == 1); + REQUIRE(std::get<2>(res) == 0); + REQUIRE(std::get<3>(res) == inverse); + } + { // num_qubits = 2, wires = {1, 0} -> fallback + const auto res = func(arr.data(), 2, {1, 0}, inverse, angle); + REQUIRE(std::get<0>(res) == std::string("fallback")); + REQUIRE(std::get<1>(res) == 0); + REQUIRE(std::get<2>(res) == 1); + REQUIRE(std::get<3>(res) == inverse); + } + } + } } From d74f58fa0cfff5edeb10322a621aa6d4d11b7228 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 24 Jan 2023 17:17:21 -0500 Subject: [PATCH 07/22] Fix for tidy --- pennylane_lightning/src/tests/Test_AVXTwoQubitGateHelpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/src/tests/Test_AVXTwoQubitGateHelpers.cpp b/pennylane_lightning/src/tests/Test_AVXTwoQubitGateHelpers.cpp index c9119a970a..71b91661ef 100644 --- a/pennylane_lightning/src/tests/Test_AVXTwoQubitGateHelpers.cpp +++ b/pennylane_lightning/src/tests/Test_AVXTwoQubitGateHelpers.cpp @@ -613,7 +613,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]", num_qubits - wires[1] - 1, inverse}; }; - const TestType angle = static_cast(0.312); + const auto angle = static_cast(0.312); SECTION("Test TwoQubitGateWithParamHelper for symmetric gates with " "packed_size = 8") { From 88fee73a16b1124c3973d90cdb9fbdbc63fe8ba9 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 24 Jan 2023 18:27:00 -0500 Subject: [PATCH 08/22] Remove some internal classes from doc --- .../src/gates/cpu_kernels/avx_common/AVX2Concept.hpp | 2 ++ .../src/gates/cpu_kernels/avx_common/AVX512Concept.hpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX2Concept.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX2Concept.hpp index 66ad8bb2b2..c202701498 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX2Concept.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX2Concept.hpp @@ -25,6 +25,7 @@ #include namespace Pennylane::Gates::AVXCommon { +///@cond DEV namespace Internal { template struct AVX2Intrinsic { static_assert(std::is_same_v || std::is_same_v); @@ -32,6 +33,7 @@ template struct AVX2Intrinsic { template <> struct AVX2Intrinsic { using Type = __m256; }; template <> struct AVX2Intrinsic { using Type = __m256d; }; } // namespace Internal +///@endcond template struct AVX2Concept { using PrecisionT = T; diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX512Concept.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX512Concept.hpp index d589a9c35c..e0459374d4 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX512Concept.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX512Concept.hpp @@ -26,6 +26,7 @@ #include namespace Pennylane::Gates::AVXCommon { +///@cond DEV namespace Internal { template struct AVX512Intrinsic { static_assert(std::is_same_v || std::is_same_v); @@ -33,6 +34,7 @@ template struct AVX512Intrinsic { template <> struct AVX512Intrinsic { using Type = __m512; }; template <> struct AVX512Intrinsic { using Type = __m512d; }; } // namespace Internal +///@endcond template struct AVX512Concept { using PrecisionT = T; From 522fc38613993734de7879c46a5ced83de45c700 Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Fri, 27 Jan 2023 17:05:10 +0000 Subject: [PATCH 09/22] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index 1b5c51eed3..c8a9133e54 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.29.0-dev1" +__version__ = "0.29.0-dev2" From 16996fb198a64c0624cb4f4c1e8218ff06b75b45 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 28 Jan 2023 16:55:40 -0500 Subject: [PATCH 10/22] Apply suggestions from code review Co-authored-by: Amintor Dusko <87949283+AmintorDusko@users.noreply.github.com> --- .../src/gates/cpu_kernels/avx_common/AVXUtil.hpp | 4 ++-- .../src/gates/cpu_kernels/avx_common/ApplyCNOT.hpp | 4 ++-- .../src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp | 4 ++-- .../src/gates/cpu_kernels/avx_common/README.md | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp index 27ee57fc63..752eff4c60 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp @@ -298,7 +298,7 @@ constexpr __m512i setr512i(int64_t e0, int64_t e1, int64_t e2, int64_t e3, /** * @brief @rst - * For a function :math:`f(x)` with binary output, this function create + * For a function :math:`f(x)` with binary output, this function creates * an AVX intrinsic floating-point type with values :math:`(-1)^{f(x)}` * where :math:`x` is index of an array (viewed as a complex-valued array). * @endrst @@ -338,7 +338,7 @@ auto setValueOneTwo(Func &&func) -> AVXIntrinsicType { std::array data{}; for (size_t idx = 0; idx < packed_size / 2; idx++) { data[2 * idx + 0] = static_cast(func(idx)); - data[2 * idx + 1] = static_cast(func(idx)); + data[2 * idx + 1] = data[2 * idx + 0]; } return set(data); } diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyCNOT.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyCNOT.hpp index ab270c29f1..5a6ba08dc0 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyCNOT.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyCNOT.hpp @@ -39,7 +39,7 @@ template struct ApplyCNOT { constexpr static bool symmetric = false; template - static consteval auto applyInternalInternalPermuation() { + static consteval auto applyInternalInternalPermutation() { std::array perm{}; for (size_t k = 0; k < packed_size / 2; k++) { @@ -59,7 +59,7 @@ template struct ApplyCNOT { size_t num_qubits, [[maybe_unused]] bool inverse) { constexpr static auto perm = - applyInternalInternalPermuation(); + applyInternalInternalPermutation(); for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) { const auto v = PrecisionAVXConcept::load(arr + n); diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp index 771d1c5c82..95ec692e43 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp @@ -67,7 +67,7 @@ template struct ApplyPhaseShift { arr[2 * n + 1] = 1.0; } else { arr[2 * n + 0] = std::cos(angle); - arr[2 * n + 1] = std::cos(angle); + arr[2 * n + 1] = arr[2 * n + 0]; } } return set(arr); @@ -85,7 +85,7 @@ template struct ApplyPhaseShift { arr[2 * n + 1] = 0.0; } else { arr[2 * n + 0] = -std::sin(angle); - arr[2 * n + 1] = std::sin(angle); + arr[2 * n + 1] = -arr[2 * n + 0]; } } return set(arr); diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/README.md b/pennylane_lightning/src/gates/cpu_kernels/avx_common/README.md index b7ccf9c734..cae6d844db 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/README.md +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/README.md @@ -8,7 +8,7 @@ For two-qubit operations, we have `applyInternalInternal` (both wires act intern Most cases, we implement a gate operation by splitting it into permutations, multiplications, and summations. These operations are translated into intrinsics in the compile time using C++ template mechanism. -Permutations and factors for multiplications are often obtained from functions. Those functions are named by concatenating the function name it is called with `Permutation` or `Factor`. For example, `applyInternalInternalPermuation` returns a permutation that is required for `applyInternalInternal` function. +Permutations and factors for multiplications are often obtained from functions. Those functions are named by concatenating the function name it is called with `Permutation` or `Factor`. For example, `applyInternalInternalPermutation` returns a permutation that is required for `applyInternalInternal` function. See [the document](https://docs.pennylane.ai/projects/lightning/en/stable/avx_kernels/implementation.html) for details of the implementation. From d78366c6d61638eb55e5b66bc4836572e30b93f0 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 28 Jan 2023 16:56:09 -0500 Subject: [PATCH 11/22] Change set to overloadded functions --- .../gates/cpu_kernels/avx_common/AVXUtil.hpp | 69 ++++++++----------- .../avx_common/ApplyPhaseShift.hpp | 4 +- 2 files changed, 30 insertions(+), 43 deletions(-) diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp index 27ee57fc63..9baf87dd44 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp @@ -214,50 +214,37 @@ constexpr auto internal_wires_v = InternalWires::value; template struct Set; #ifdef PL_USE_AVX2 -template <> struct Set { - constexpr static auto create(const std::array &arr) - -> AVXIntrinsicType { - // NOLINTBEGIN(readability-magic-numbers) - return __m256{arr[0], arr[1], arr[2], arr[3], - arr[4], arr[5], arr[6], arr[7]}; - // NOLINTEND(readability-magic-numbers) - } -}; -template <> struct Set { - constexpr static auto create(const std::array &arr) - -> AVXIntrinsicType { - // NOLINTBEGIN(readability-magic-numbers) - return __m256d{arr[0], arr[1], arr[2], arr[3]}; - // NOLINTEND(readability-magic-numbers) - } -}; +constexpr static auto set(const std::array &arr) + -> AVXIntrinsicType { + // NOLINTBEGIN(readability-magic-numbers) + return __m256{arr[0], arr[1], arr[2], arr[3], + arr[4], arr[5], arr[6], arr[7]}; + // NOLINTEND(readability-magic-numbers) +} +constexpr static auto set(const std::array &arr) + -> AVXIntrinsicType { + // NOLINTBEGIN(readability-magic-numbers) + return __m256d{arr[0], arr[1], arr[2], arr[3]}; + // NOLINTEND(readability-magic-numbers) +} #endif #ifdef PL_USE_AVX512F -template <> struct Set { - constexpr static auto create(const std::array &arr) - -> AVXIntrinsicType { - // NOLINTBEGIN(readability-magic-numbers) - return __m512{arr[0], arr[1], arr[2], arr[3], arr[4], arr[5], - arr[6], arr[7], arr[8], arr[9], arr[10], arr[11], - arr[12], arr[13], arr[14], arr[15]}; - // NOLINTEND(readability-magic-numbers) - } -}; -template <> struct Set { - constexpr static auto create(const std::array &arr) - -> AVXIntrinsicType { - // NOLINTBEGIN(readability-magic-numbers) - return __m512d{arr[0], arr[1], arr[2], arr[3], - arr[4], arr[5], arr[6], arr[7]}; - // NOLINTEND(readability-magic-numbers) - } -}; -#endif -template -constexpr auto set(const std::array &arr) - -> AVXIntrinsicType { - return Set::create(arr); +constexpr static auto set(const std::array &arr) + -> AVXIntrinsicType { + // NOLINTBEGIN(readability-magic-numbers) + return __m512{arr[0], arr[1], arr[2], arr[3], arr[4], arr[5], + arr[6], arr[7], arr[8], arr[9], arr[10], arr[11], + arr[12], arr[13], arr[14], arr[15]}; + // NOLINTEND(readability-magic-numbers) +} +constexpr static auto set(const std::array &arr) + -> AVXIntrinsicType { + // NOLINTBEGIN(readability-magic-numbers) + return __m512d{arr[0], arr[1], arr[2], arr[3], + arr[4], arr[5], arr[6], arr[7]}; + // NOLINTEND(readability-magic-numbers) } +#endif // clang-format off #ifdef PL_USE_AVX2 diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp index 771d1c5c82..879df5d448 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp @@ -70,7 +70,7 @@ template struct ApplyPhaseShift { arr[2 * n + 1] = std::cos(angle); } } - return set(arr); + return set(arr); } /** @@ -88,7 +88,7 @@ template struct ApplyPhaseShift { arr[2 * n + 1] = std::sin(angle); } } - return set(arr); + return set(arr); } template From fb8562d547aac4263dcd3976fc70221347dd32a0 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 28 Jan 2023 16:58:21 -0500 Subject: [PATCH 12/22] Format --- .../gates/cpu_kernels/avx_common/AVXUtil.hpp | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp index 8e9fd7691f..d3dddf3e25 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp @@ -215,34 +215,34 @@ constexpr auto internal_wires_v = InternalWires::value; template struct Set; #ifdef PL_USE_AVX2 constexpr static auto set(const std::array &arr) - -> AVXIntrinsicType { - // NOLINTBEGIN(readability-magic-numbers) - return __m256{arr[0], arr[1], arr[2], arr[3], - arr[4], arr[5], arr[6], arr[7]}; - // NOLINTEND(readability-magic-numbers) + -> AVXIntrinsicType { + // NOLINTBEGIN(readability-magic-numbers) + return __m256{arr[0], arr[1], arr[2], arr[3], + arr[4], arr[5], arr[6], arr[7]}; + // NOLINTEND(readability-magic-numbers) } constexpr static auto set(const std::array &arr) - -> AVXIntrinsicType { - // NOLINTBEGIN(readability-magic-numbers) - return __m256d{arr[0], arr[1], arr[2], arr[3]}; - // NOLINTEND(readability-magic-numbers) + -> AVXIntrinsicType { + // NOLINTBEGIN(readability-magic-numbers) + return __m256d{arr[0], arr[1], arr[2], arr[3]}; + // NOLINTEND(readability-magic-numbers) } #endif #ifdef PL_USE_AVX512F constexpr static auto set(const std::array &arr) - -> AVXIntrinsicType { - // NOLINTBEGIN(readability-magic-numbers) - return __m512{arr[0], arr[1], arr[2], arr[3], arr[4], arr[5], - arr[6], arr[7], arr[8], arr[9], arr[10], arr[11], - arr[12], arr[13], arr[14], arr[15]}; - // NOLINTEND(readability-magic-numbers) + -> AVXIntrinsicType { + // NOLINTBEGIN(readability-magic-numbers) + return __m512{arr[0], arr[1], arr[2], arr[3], arr[4], arr[5], + arr[6], arr[7], arr[8], arr[9], arr[10], arr[11], + arr[12], arr[13], arr[14], arr[15]}; + // NOLINTEND(readability-magic-numbers) } constexpr static auto set(const std::array &arr) - -> AVXIntrinsicType { - // NOLINTBEGIN(readability-magic-numbers) - return __m512d{arr[0], arr[1], arr[2], arr[3], - arr[4], arr[5], arr[6], arr[7]}; - // NOLINTEND(readability-magic-numbers) + -> AVXIntrinsicType { + // NOLINTBEGIN(readability-magic-numbers) + return __m512d{arr[0], arr[1], arr[2], arr[3], + arr[4], arr[5], arr[6], arr[7]}; + // NOLINTEND(readability-magic-numbers) } #endif From 1778b71162d85c9dc29d7eee212ce34ab0d7c297 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 28 Jan 2023 17:13:11 -0500 Subject: [PATCH 13/22] Fix test name --- pennylane_lightning/src/tests/Test_TypeTraits.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/src/tests/Test_TypeTraits.cpp b/pennylane_lightning/src/tests/Test_TypeTraits.cpp index 47fcccae30..b498b6ead6 100644 --- a/pennylane_lightning/src/tests/Test_TypeTraits.cpp +++ b/pennylane_lightning/src/tests/Test_TypeTraits.cpp @@ -38,7 +38,7 @@ TEST_CASE("Test is_complex") { std::pair g(std::tuple); TEST_CASE("Test FuncReturn") { - SECTION("FuncReturn returns correctly returns the return type") { + SECTION("FuncReturn gives correct return types") { STATIC_REQUIRE( std::is_same_v::Type, std::pair>); // return type of g is From 4ec95f509fd5c74bec51192dff6aadf15e107467 Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Mon, 30 Jan 2023 20:19:14 +0000 Subject: [PATCH 14/22] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index c8a9133e54..a2c46e0f20 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.29.0-dev2" +__version__ = "0.29.0-dev3" From 075891ca0c401a41d40373071e9cf3b7942c37e3 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 1 Feb 2023 09:40:04 -0500 Subject: [PATCH 15/22] Change function mae set to setValue --- .../src/gates/cpu_kernels/avx_common/AVXUtil.hpp | 13 ++++++------- .../cpu_kernels/avx_common/ApplyPhaseShift.hpp | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp index d3dddf3e25..15f85f1ece 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXUtil.hpp @@ -212,16 +212,15 @@ template struct InternalWires { template constexpr auto internal_wires_v = InternalWires::value; -template struct Set; #ifdef PL_USE_AVX2 -constexpr static auto set(const std::array &arr) +constexpr static auto setValue(const std::array &arr) -> AVXIntrinsicType { // NOLINTBEGIN(readability-magic-numbers) return __m256{arr[0], arr[1], arr[2], arr[3], arr[4], arr[5], arr[6], arr[7]}; // NOLINTEND(readability-magic-numbers) } -constexpr static auto set(const std::array &arr) +constexpr static auto setValue(const std::array &arr) -> AVXIntrinsicType { // NOLINTBEGIN(readability-magic-numbers) return __m256d{arr[0], arr[1], arr[2], arr[3]}; @@ -229,7 +228,7 @@ constexpr static auto set(const std::array &arr) } #endif #ifdef PL_USE_AVX512F -constexpr static auto set(const std::array &arr) +constexpr static auto setValue(const std::array &arr) -> AVXIntrinsicType { // NOLINTBEGIN(readability-magic-numbers) return __m512{arr[0], arr[1], arr[2], arr[3], arr[4], arr[5], @@ -237,7 +236,7 @@ constexpr static auto set(const std::array &arr) arr[12], arr[13], arr[14], arr[15]}; // NOLINTEND(readability-magic-numbers) } -constexpr static auto set(const std::array &arr) +constexpr static auto setValue(const std::array &arr) -> AVXIntrinsicType { // NOLINTBEGIN(readability-magic-numbers) return __m512d{arr[0], arr[1], arr[2], arr[3], @@ -311,7 +310,7 @@ auto toParity(Func &&func) -> AVXIntrinsicType { data[2 * idx + 1] = static_cast(1.0) - 2 * static_cast(func(idx)); } - return set(data); + return setValue(data); } /** @@ -327,6 +326,6 @@ auto setValueOneTwo(Func &&func) -> AVXIntrinsicType { data[2 * idx + 0] = static_cast(func(idx)); data[2 * idx + 1] = data[2 * idx + 0]; } - return set(data); + return setValue(data); } } // namespace Pennylane::Gates::AVXCommon diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp index f5ad70196f..5638a682dd 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp @@ -70,7 +70,7 @@ template struct ApplyPhaseShift { arr[2 * n + 1] = arr[2 * n + 0]; } } - return set(arr); + return setValue(arr); } /** @@ -88,7 +88,7 @@ template struct ApplyPhaseShift { arr[2 * n + 1] = -arr[2 * n + 0]; } } - return set(arr); + return setValue(arr); } template From eea6a305303af223306cc36f3edf02944e2226f5 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 1 Feb 2023 09:42:59 -0500 Subject: [PATCH 16/22] New black --- pennylane_lightning/_serialize.py | 1 - tests/test_decomposition.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pennylane_lightning/_serialize.py b/pennylane_lightning/_serialize.py index 1e2359cb2f..0338b7c240 100644 --- a/pennylane_lightning/_serialize.py +++ b/pennylane_lightning/_serialize.py @@ -174,7 +174,6 @@ def _serialize_ops( op_list = [o] for single_op in op_list: - name = single_op.name names.append(name) diff --git a/tests/test_decomposition.py b/tests/test_decomposition.py index 9bd19ab016..3be39c5068 100644 --- a/tests/test_decomposition.py +++ b/tests/test_decomposition.py @@ -38,7 +38,6 @@ class TestDenseMatrixDecompositionThreshold: @pytest.mark.parametrize("op, n_wires, condition", input) def test_threshold(self, op, n_wires, condition): - wires = np.linspace(0, n_wires - 1, n_wires, dtype=int) op = op(wires=wires) assert LightningQubit.stopping_condition.__get__(op)(op) == condition From 385f87f0457a5a7893506394181ca64da3f303f6 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 1 Feb 2023 13:23:45 -0500 Subject: [PATCH 17/22] Apply suggestions from code review Co-authored-by: Lee James O'Riordan --- .../src/gates/cpu_kernels/avx_common/README.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/README.md b/pennylane_lightning/src/gates/cpu_kernels/avx_common/README.md index cae6d844db..ca67544a8d 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/README.md +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/README.md @@ -2,13 +2,11 @@ Each gate operation is implemented in a class with a corresponding name. For example, SWAP operation is implemented in `ApplySwap` class defined in [ApplySwap.cpp](ApplySwap.cpp) file. -Depending on the wires gates apply to, we use two (for single-qubit operations), three (for symmetric two-qubit operators), and four (for non-symmetric two-qubit operators) functions to implement each gate. -For single-qubit operations, functions `applyInternal` corresponds to intra-register gate operations and `applyExternal` corresponds to inter-register gate operations. +Depending on the wires the gates apply to, we use two (for single-qubit operations), three (for symmetric two-qubit operators), and four (for non-symmetric two-qubit operators) functions to implement each gate. +For single-qubit operations, the functions named `applyInternal` correspond to intra-register gate operations and those named `applyExternal` correspond to inter-register gate operations. For two-qubit operations, we have `applyInternalInternal` (both wires act internally), `applyInternalExternal` (control wire acts internally whereas target wire acts externally), `applyExternalInternal` (target wire acts internally whereas control wire acts externally), and `applyExternalExternal` (both wires act externally). - -Most cases, we implement a gate operation by splitting it into permutations, multiplications, and summations. These operations are translated into intrinsics in the compile time using C++ template mechanism. -Permutations and factors for multiplications are often obtained from functions. Those functions are named by concatenating the function name it is called with `Permutation` or `Factor`. For example, `applyInternalInternalPermutation` returns a permutation that is required for `applyInternalInternal` function. - +In most cases, we implement a gate operation by splitting it into permutations, multiplications, and summations. These operations are translated into intrinsics at compile time using the C++ template mechanism. +Permutations and factors for multiplications are often obtained from functions. Those functions are named by concatenating the function name with `Permutation` or `Factor`. For example, `applyInternalInternalPermutation` returns a permutation that is required for an `applyInternalInternal` function. See [the document](https://docs.pennylane.ai/projects/lightning/en/stable/avx_kernels/implementation.html) for details of the implementation. From 69819e327ed02d66cc18a4dfe2c0b118081b9f45 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 1 Feb 2023 13:24:04 -0500 Subject: [PATCH 18/22] Apply suggestions from code review Co-authored-by: Lee James O'Riordan --- .../src/gates/cpu_kernels/avx_common/SingleQubitGateHelper.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/src/gates/cpu_kernels/avx_common/SingleQubitGateHelper.hpp b/pennylane_lightning/src/gates/cpu_kernels/avx_common/SingleQubitGateHelper.hpp index d1501e8c30..1d76544f43 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/avx_common/SingleQubitGateHelper.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/avx_common/SingleQubitGateHelper.hpp @@ -19,7 +19,7 @@ * Depending on the wire the gate applies to, one needs to call * ``applyInternal`` or ``applyExternal`` in classes implementing AVX2/512 * gates. As ``applyInternal`` takes ``wire`` as a template parameters, we - * instantiates this function for all possible ``wire`` and call the correct one + * instantiate this function for all possible ``wire`` and call the correct one * in runtime. */ #pragma once From 85df70db31d2747f936980307ced5bc9be21cadc Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Wed, 1 Feb 2023 22:41:32 +0000 Subject: [PATCH 19/22] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index 10477be179..6052145646 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.29.0-dev4" +__version__ = "0.29.0-dev5" From 160d7c65ff52a93a1faca05688e8fb512576f443 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 1 Feb 2023 17:43:15 -0500 Subject: [PATCH 20/22] Enable Dispatcher in C++ only build --- pennylane_lightning/src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/pennylane_lightning/src/CMakeLists.txt b/pennylane_lightning/src/CMakeLists.txt index ec031ea515..8fe123465a 100644 --- a/pennylane_lightning/src/CMakeLists.txt +++ b/pennylane_lightning/src/CMakeLists.txt @@ -6,6 +6,7 @@ set(CMAKE_CXX_STANDARD 20) # At least C++20 is required option(ENABLE_WARNINGS "Enable warnings" ON) option(ENABLE_OPENMP "Enable OpenMP" ON) +option(ENABLE_GATE_DISPATCHER "Enable gate kernel dispatching on AVX/AVX2/AVX512" ON) if(ENABLE_CLANG_TIDY) if(NOT DEFINED CLANG_TIDY_BINARY) From 3ef4589f4d6c1d29771afe9e73965678d8b40cfe Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Fri, 3 Feb 2023 14:39:45 +0000 Subject: [PATCH 21/22] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index 6052145646..4784f68305 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.29.0-dev5" +__version__ = "0.29.0-dev6" From 8ac1bd99bbba0fe40230ce9d0d0df5435a3b5948 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Fri, 3 Feb 2023 12:40:43 -0500 Subject: [PATCH 22/22] Update changelog --- .github/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index e1e6dc30bd..ccc70041dd 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -9,6 +9,9 @@ * Allow better integration and installation support with CMake targeted binary builds. [(#403)](https://github.com/PennyLaneAI/pennylane-lightning/pull/403) +* Update AVX2/512 kernel infrastructure for additional gate/generator operations. +[(#404)](https://github.com/PennyLaneAI/pennylane-lightning/pull/404) + ### Documentation ### Bug fixes