PennyLaneAI
diff --git a/‎.github/CHANGELOG.md
+3 b/‎.github/CHANGELOG.md
+3
diff --git a/‎pennylane_lightning/_version.py
+1-1 b/‎pennylane_lightning/_version.py
+1-1
diff --git a/‎pennylane_lightning/src/CMakeLists.txt
+1 b/‎pennylane_lightning/src/CMakeLists.txt
+1
diff --git a/‎pennylane_lightning/src/gates/cpu_kernels/GateImplementationsAVXCommon.hpp
+18-44 b/‎pennylane_lightning/src/gates/cpu_kernels/GateImplementationsAVXCommon.hpp
+18-44
diff --git a/‎pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX2Concept.hpp
+2-3 b/‎pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX2Concept.hpp
+2-3
diff --git a/‎pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX512Concept.hpp
+2-4 b/‎pennylane_lightning/src/gates/cpu_kernels/avx_common/AVX512Concept.hpp
+2-4
diff --git a/‎pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXConceptType.hpp
+49 b/‎pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXConceptType.hpp
+49
diff --git a/‎pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXGateKernels.hpp
+35 b/‎pennylane_lightning/src/gates/cpu_kernels/avx_common/AVXGateKernels.hpp
+35
@@ -12,6 +12,9 @@
 * Remove explicit Numpy and Scipy requirements.
 [(#412)](https://github.com/PennyLaneAI/pennylane-lightning/pull/412)
 
+* Update AVX2/512 kernel infrastructure for additional gate/generator operations.
+[(#404)](https://github.com/PennyLaneAI/pennylane-lightning/pull/404)
+
 ### Documentation
 
 ### Bug fixes
 
@@ -16,4 +16,4 @@
    Version number (major.minor.patch[-label])
 """
 
-__version__ = "0.29.0-dev5"
+__version__ = "0.29.0-dev6"
@@ -6,6 +6,7 @@ set(CMAKE_CXX_STANDARD 20) # At least C++20 is required
 
 option(ENABLE_WARNINGS "Enable warnings" ON)
 option(ENABLE_OPENMP "Enable OpenMP" ON)
+option(ENABLE_GATE_DISPATCHER "Enable gate kernel dispatching on AVX/AVX2/AVX512" ON)
 
 if(ENABLE_CLANG_TIDY)
     if(NOT DEFINED CLANG_TIDY_BINARY)
 
@@ -16,33 +16,7 @@
  * Defines kernel functions for all AVX
  */
 #pragma once
-
-// General implementations
-#include "Macros.hpp"
-
-#ifdef PL_USE_AVX2
-#include "avx_common/AVX2Concept.hpp"
-#endif
-#ifdef PL_USE_AVX512F
-#include "avx_common/AVX512Concept.hpp"
-#endif
-#include "avx_common/ApplyCNOT.hpp"
-#include "avx_common/ApplyCZ.hpp"
-#include "avx_common/ApplyHadamard.hpp"
-#include "avx_common/ApplyIsingXX.hpp"
-#include "avx_common/ApplyIsingYY.hpp"
-#include "avx_common/ApplyIsingZZ.hpp"
-#include "avx_common/ApplyPauliX.hpp"
-#include "avx_common/ApplyPauliY.hpp"
-#include "avx_common/ApplyPauliZ.hpp"
-#include "avx_common/ApplyPhaseShift.hpp"
-#include "avx_common/ApplyRX.hpp"
-#include "avx_common/ApplyRY.hpp"
-#include "avx_common/ApplyRZ.hpp"
-#include "avx_common/ApplyS.hpp"
-#include "avx_common/ApplySWAP.hpp"
-#include "avx_common/ApplySingleQubitOp.hpp"
-#include "avx_common/ApplyT.hpp"
+#include "avx_common/AVXGateKernels.hpp"
 #include "avx_common/SingleQubitGateHelper.hpp"
 #include "avx_common/TwoQubitGateHelper.hpp"
 
@@ -91,7 +65,7 @@ class GateImplementationsAVXCommon
                           std::is_same_v<PrecisionT, double>,
                       "Only float and double are supported.");
 
-        assert(wires.size() == 1);
+        PL_ASSERT(wires.size() == 1);
         auto helper =
             AVXCommon::SingleQubitGateWithoutParamHelper<ApplyPauliXAVX>(
                 &GateImplementationsLM::applyPauliX);
@@ -110,7 +84,7 @@ class GateImplementationsAVXCommon
                           std::is_same_v<PrecisionT, double>,
                       "Only float and double are supported.");
 
-        assert(wires.size() == 1);
+        PL_ASSERT(wires.size() == 1);
         auto helper =
             AVXCommon::SingleQubitGateWithoutParamHelper<ApplyPauliYAVX>(
                 &GateImplementationsLM::applyPauliY);
@@ -129,7 +103,7 @@ class GateImplementationsAVXCommon
                           std::is_same_v<PrecisionT, double>,
                       "Only float and double are supported.");
 
-        assert(wires.size() == 1);
+        PL_ASSERT(wires.size() == 1);
         auto helper =
             AVXCommon::SingleQubitGateWithoutParamHelper<ApplyPauliZAVX>(
                 &GateImplementationsLM::applyPauliZ);
@@ -161,7 +135,7 @@ class GateImplementationsAVXCommon
                           std::is_same_v<PrecisionT, double>,
                       "Only float and double are supported.");
 
-        assert(wires.size() == 1);
+        PL_ASSERT(wires.size() == 1);
         auto helper = AVXCommon::SingleQubitGateWithoutParamHelper<ApplyTAVX>(
             &GateImplementationsLM::applyT);
         helper(arr, num_qubits, wires, inverse);
@@ -179,7 +153,7 @@ class GateImplementationsAVXCommon
                           std::is_same_v<PrecisionT, double>,
                       "Only float and double are supported.");
 
-        assert(wires.size() == 1);
+        PL_ASSERT(wires.size() == 1);
         auto helper =
             AVXCommon::SingleQubitGateWithParamHelper<ApplyPhaseShiftAVX,
                                                       ParamT>(
@@ -198,7 +172,7 @@ class GateImplementationsAVXCommon
         static_assert(std::is_same_v<PrecisionT, float> ||
                           std::is_same_v<PrecisionT, double>,
                       "Only float and double are supported.");
-        assert(wires.size() == 1);
+        PL_ASSERT(wires.size() == 1);
         auto helper =
             AVXCommon::SingleQubitGateWithoutParamHelper<ApplyHadamardAVX>(
                 &GateImplementationsLM::applyHadamard);
@@ -215,7 +189,7 @@ class GateImplementationsAVXCommon
         static_assert(std::is_same_v<PrecisionT, float> ||
                           std::is_same_v<PrecisionT, double>,
                       "Only float and double are supported.");
-        assert(wires.size() == 1);
+        PL_ASSERT(wires.size() == 1);
         auto helper =
             AVXCommon::SingleQubitGateWithParamHelper<ApplyRXAVX, ParamT>(
                 &GateImplementationsLM::applyRX);
@@ -232,7 +206,7 @@ class GateImplementationsAVXCommon
         static_assert(std::is_same_v<PrecisionT, float> ||
                           std::is_same_v<PrecisionT, double>,
                       "Only float and double are supported.");
-        assert(wires.size() == 1);
+        PL_ASSERT(wires.size() == 1);
         auto helper =
             AVXCommon::SingleQubitGateWithParamHelper<ApplyRYAVX, ParamT>(
                 &GateImplementationsLM::applyRY);
@@ -249,7 +223,7 @@ class GateImplementationsAVXCommon
         static_assert(std::is_same_v<PrecisionT, float> ||
                           std::is_same_v<PrecisionT, double>,
                       "Only float and double are supported.");
-        assert(wires.size() == 1);
+        PL_ASSERT(wires.size() == 1);
         auto helper =
             AVXCommon::SingleQubitGateWithParamHelper<ApplyRZAVX, ParamT>(
                 &GateImplementationsLM::applyRZ);
@@ -260,7 +234,7 @@ class GateImplementationsAVXCommon
     static void applyRot(std::complex<PrecisionT> *arr, const size_t num_qubits,
                          const std::vector<size_t> &wires, bool inverse,
                          ParamT phi, ParamT theta, ParamT omega) {
-        assert(wires.size() == 1);
+        PL_ASSERT(wires.size() == 1);
 
         const auto rotMat =
             (inverse) ? Gates::getRot<PrecisionT>(-omega, -theta, -phi)
@@ -282,7 +256,7 @@ class GateImplementationsAVXCommon
                           std::is_same_v<PrecisionT, double>,
                       "Only float and double are supported.");
 
-        assert(wires.size() == 2);
+        PL_ASSERT(wires.size() == 2);
 
         const AVXCommon::TwoQubitGateWithoutParamHelper<ApplyCZAVX> gate_helper(
             &GateImplementationsLM::applyCZ<PrecisionT>);
@@ -302,7 +276,7 @@ class GateImplementationsAVXCommon
                           std::is_same_v<PrecisionT, double>,
                       "Only float and double are supported.");
 
-        assert(wires.size() == 2);
+        PL_ASSERT(wires.size() == 2);
 
         const AVXCommon::TwoQubitGateWithoutParamHelper<ApplySWAPAVX>
             gate_helper(&GateImplementationsLM::applySWAP<PrecisionT>);
@@ -314,7 +288,7 @@ class GateImplementationsAVXCommon
     static void
     applyCNOT(std::complex<PrecisionT> *arr, const size_t num_qubits,
               const std::vector<size_t> &wires, [[maybe_unused]] bool inverse) {
-        assert(wires.size() == 2);
+        PL_ASSERT(wires.size() == 2);
 
         using ApplyCNOTAVX =
             AVXCommon::ApplyCNOT<PrecisionT,
@@ -327,7 +301,7 @@ class GateImplementationsAVXCommon
                           std::is_same_v<PrecisionT, double>,
                       "Only float and double are supported.");
 
-        assert(wires.size() == 2);
+        PL_ASSERT(wires.size() == 2);
 
         const AVXCommon::TwoQubitGateWithoutParamHelper<ApplyCNOTAVX>
             gate_helper(&GateImplementationsLM::applyCNOT<PrecisionT>);
@@ -340,7 +314,7 @@ class GateImplementationsAVXCommon
                              const size_t num_qubits,
                              const std::vector<size_t> &wires,
                              [[maybe_unused]] bool inverse, ParamT angle) {
-        assert(wires.size() == 2);
+        PL_ASSERT(wires.size() == 2);
 
         using ApplyIsingXXAVX =
             AVXCommon::ApplyIsingXX<PrecisionT,
@@ -362,7 +336,7 @@ class GateImplementationsAVXCommon
                              const size_t num_qubits,
                              const std::vector<size_t> &wires,
                              [[maybe_unused]] bool inverse, ParamT angle) {
-        assert(wires.size() == 2);
+        PL_ASSERT(wires.size() == 2);
 
         using ApplyIsingYYAVX =
             AVXCommon::ApplyIsingYY<PrecisionT,
@@ -392,7 +366,7 @@ class GateImplementationsAVXCommon
                           std::is_same_v<PrecisionT, double>,
                       "Only float and double are supported.");
 
-        assert(wires.size() == 2);
+        PL_ASSERT(wires.size() == 2);
 
         const AVXCommon::TwoQubitGateWithParamHelper<ApplyIsingZZAVX, ParamT>
             gate_helper(
 
@@ -18,21 +18,22 @@
 #pragma once
 #include "AVXUtil.hpp"
 #include "BitUtil.hpp"
-#include "Macros.hpp"
 #include "Util.hpp"
 
 #include <immintrin.h>
 
 #include <type_traits>
 
 namespace Pennylane::Gates::AVXCommon {
+///@cond DEV
 namespace Internal {
 template <typename T> struct AVX2Intrinsic {
     static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>);
 };
 template <> struct AVX2Intrinsic<float> { using Type = __m256; };
 template <> struct AVX2Intrinsic<double> { using Type = __m256d; };
 } // namespace Internal
+///@endcond
 
 template <typename T> struct AVX2Concept {
     using PrecisionT = T;
@@ -110,6 +111,4 @@ template <typename T> struct AVX2Concept {
         }
     }
 };
-template <> struct AVXConcept<float, 8> { using Type = AVX2Concept<float>; };
-template <> struct AVXConcept<double, 4> { using Type = AVX2Concept<double>; };
 } // namespace Pennylane::Gates::AVXCommon
@@ -26,13 +26,15 @@
 #include <type_traits>
 
 namespace Pennylane::Gates::AVXCommon {
+///@cond DEV
 namespace Internal {
 template <typename T> struct AVX512Intrinsic {
     static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>);
 };
 template <> struct AVX512Intrinsic<float> { using Type = __m512; };
 template <> struct AVX512Intrinsic<double> { using Type = __m512d; };
 } // namespace Internal
+///@endcond
 
 template <typename T> struct AVX512Concept {
     using PrecisionT = T;
@@ -111,8 +113,4 @@ template <typename T> struct AVX512Concept {
     }
 };
 
-template <> struct AVXConcept<float, 16> { using Type = AVX512Concept<float>; };
-template <> struct AVXConcept<double, 8> {
-    using Type = AVX512Concept<double>;
-};
 } // namespace Pennylane::Gates::AVXCommon
@@ -0,0 +1,49 @@
+// Copyright 2023 Xanadu Quantum Technologies Inc.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//     http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/**
+ * @file
+ * Defines AVXConcept types
+ */
+#pragma once
+
+#include "Macros.hpp"
+
+#ifdef PL_USE_AVX2
+#include "AVX2Concept.hpp"
+#endif
+
+#ifdef PL_USE_AVX512F
+#include "AVX512Concept.hpp"
+#endif
+
+namespace Pennylane::Gates::AVXCommon {
+
+template <class PrecisionT, size_t packed_size> struct AVXConcept;
+
+#ifdef PL_USE_AVX2
+template <> struct AVXConcept<float, 8> { using Type = AVX2Concept<float>; };
+template <> struct AVXConcept<double, 4> { using Type = AVX2Concept<double>; };
+#endif
+
+#ifdef PL_USE_AVX512F
+template <> struct AVXConcept<float, 16> { using Type = AVX512Concept<float>; };
+template <> struct AVXConcept<double, 8> {
+    using Type = AVX512Concept<double>;
+};
+#endif
+
+template <class PrecisionT, size_t packed_size>
+using AVXConceptType = typename AVXConcept<PrecisionT, packed_size>::Type;
+
+} // namespace Pennylane::Gates::AVXCommon
@@ -0,0 +1,35 @@
+// Copyright 2023 Xanadu Quantum Technologies Inc.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//     http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/**
+ * @file
+ * Include all AVX gate implementations
+ */
+#pragma once
+#include "ApplyCNOT.hpp"
+#include "ApplyCZ.hpp"
+#include "ApplyHadamard.hpp"
+#include "ApplyIsingXX.hpp"
+#include "ApplyIsingYY.hpp"
+#include "ApplyIsingZZ.hpp"
+#include "ApplyPauliX.hpp"
+#include "ApplyPauliY.hpp"
+#include "ApplyPauliZ.hpp"
+#include "ApplyPhaseShift.hpp"
+#include "ApplyRX.hpp"
+#include "ApplyRY.hpp"
+#include "ApplyRZ.hpp"
+#include "ApplyS.hpp"
+#include "ApplySWAP.hpp"
+#include "ApplySingleQubitOp.hpp"
+#include "ApplyT.hpp"