From 9a6722c643057965d7aa911d6779199781acfcbb Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 26 Feb 2022 22:45:36 -0500 Subject: [PATCH 01/94] Managed to CPU --- Makefile | 2 +- pennylane_lightning/src/.clang-tidy | 6 +- .../src/algorithms/AdjointDiff.hpp | 52 +-- .../src/examples/CMakeLists.txt | 11 +- .../src/examples/benchmark_gate_list.cpp | 223 ++++++++++ .../src/examples/benchmark_multi_rz.cpp | 4 +- .../src/examples/benchmark_operation.cpp | 210 +++++++++ .../src/examples/gate_benchmark.cpp | 2 - .../src/examples/run_gate_benchmark.sh | 2 +- .../src/gates/AvailableKernels.hpp | 8 +- pennylane_lightning/src/gates/Gates.hpp | 2 +- .../GateImplementationsLM.hpp | 194 +++++++-- .../GateImplementationsPI.hpp | 84 +++- .../{ => cpu_kernels}/PauliGenerator.hpp | 0 .../DefaultKernelsForStateVector.hpp | 400 ++++++++++++++++++ .../src/simulator/DispatchKeys.hpp | 87 ++++ .../src/simulator/DynamicDispatcher.cpp | 8 +- .../src/simulator/DynamicDispatcher.hpp | 147 ++----- .../src/simulator/Measures.hpp | 10 +- .../src/simulator/StateVectorBase.hpp | 48 ++- .../src/simulator/StateVectorCPU.hpp | 166 ++++++++ .../src/simulator/StateVectorManaged.hpp | 104 ----- pennylane_lightning/src/tests/.clang-tidy | 4 +- pennylane_lightning/src/tests/CMakeLists.txt | 7 +- .../src/tests/CreateAllWires.cpp | 31 ++ .../src/tests/CreateAllWires.hpp | 92 ++++ pennylane_lightning/src/tests/TestHelpers.hpp | 77 ++-- pennylane_lightning/src/tests/TestKernels.hpp | 12 +- .../src/tests/Test_AdjDiff.cpp | 37 +- .../Test_DefaultKernelsForStateVector.cpp | 32 ++ .../src/tests/Test_DynamicDispatcher.cpp | 8 +- ...est_GateImplementations_CompareKernels.cpp | 185 ++++++++ .../Test_GateImplementations_Generator.cpp | 2 +- .../Test_GateImplementations_Inverse.cpp | 2 +- .../Test_GateImplementations_Nonparam.cpp | 117 ++--- .../tests/Test_GateImplementations_Param.cpp | 18 +- .../src/tests/Test_Internal.cpp | 83 +++- .../src/tests/Test_Measures.cpp | 22 +- .../src/tests/Test_StateVectorCPU.cpp | 48 +++ .../src/tests/Test_StateVectorManaged.cpp | 50 --- pennylane_lightning/src/tests/Test_Util.cpp | 15 + pennylane_lightning/src/util/BitUtil.hpp | 27 +- .../src/util/LinearAlgebra.hpp | 29 +- pennylane_lightning/src/util/Macros.hpp | 70 ++- pennylane_lightning/src/util/Memory.hpp | 106 +++++ pennylane_lightning/src/util/TypeList.hpp | 34 +- pennylane_lightning/src/util/Util.hpp | 35 ++ 47 files changed, 2381 insertions(+), 532 deletions(-) create mode 100644 pennylane_lightning/src/examples/benchmark_gate_list.cpp create mode 100644 pennylane_lightning/src/examples/benchmark_operation.cpp rename pennylane_lightning/src/gates/{ => cpu_kernels}/GateImplementationsLM.hpp (87%) rename pennylane_lightning/src/gates/{ => cpu_kernels}/GateImplementationsPI.hpp (91%) rename pennylane_lightning/src/gates/{ => cpu_kernels}/PauliGenerator.hpp (100%) create mode 100644 pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp create mode 100644 pennylane_lightning/src/simulator/DispatchKeys.hpp create mode 100644 pennylane_lightning/src/simulator/StateVectorCPU.hpp delete mode 100644 pennylane_lightning/src/simulator/StateVectorManaged.hpp create mode 100644 pennylane_lightning/src/tests/CreateAllWires.cpp create mode 100644 pennylane_lightning/src/tests/CreateAllWires.hpp create mode 100644 pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp create mode 100644 pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp create mode 100644 pennylane_lightning/src/tests/Test_StateVectorCPU.cpp delete mode 100644 pennylane_lightning/src/tests/Test_StateVectorManaged.cpp create mode 100644 pennylane_lightning/src/util/Memory.hpp diff --git a/Makefile b/Makefile index 02556dc3e0..edef79bab5 100644 --- a/Makefile +++ b/Makefile @@ -75,7 +75,7 @@ coverage: test-cpp: rm -rf ./BuildTests - cmake $(LIGHTNING_CPP_DIR) -BBuildTests -DBUILD_TESTS=ON + cmake $(LIGHTNING_CPP_DIR) -BBuildTests -DBUILD_TESTS=ON -DENABLE_OPENMP=OFF cmake --build ./BuildTests --target runner cmake --build ./BuildTests --target test diff --git a/pennylane_lightning/src/.clang-tidy b/pennylane_lightning/src/.clang-tidy index f015b16a1d..e1fce11707 100644 --- a/pennylane_lightning/src/.clang-tidy +++ b/pennylane_lightning/src/.clang-tidy @@ -1,5 +1,5 @@ --- -Checks: 'clang-diagnostic-*,clang-analyzer-*,-*,-llvmlibc-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,hicpp-*,-hicpp-no-array-decay,bugprone-suspicious-*,llvm-namespace-comment,' +Checks: 'clang-diagnostic-*,clang-analyzer-*,-*,-llvmlibc-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,hicpp-*,-hicpp-avoid-c-arrays,-hicpp-no-array-decay,bugprone-suspicious-*,llvm-namespace-comment,cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-slicing,cppcoreguidelines-special-member-functions' WarningsAsErrors: '*' HeaderFilterRegex: '.*' AnalyzeTemporaryDtors: false @@ -25,8 +25,6 @@ CheckOptions: value: 'false' - key: readability-magic-numbers.IgnoredIntegerValues value: '1;2;3;4;' - - key: readability-magic-numbers.IgnorePowersOf2IntegerValues - value: true - key: modernize-use-default-member-init.UseAssignment value: 'false' - key: readability-function-size.NestingThreshold @@ -218,7 +216,7 @@ CheckOptions: - key: modernize-use-auto.RemoveStars value: 'false' - key: readability-magic-numbers.IgnorePowersOf2IntegerValues - value: 'false' + value: 'true' - key: portability-simd-intrinsics.Std value: '' - key: readability-redundant-member-init.IgnoreBaseInCopyConstructors diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp index 1d84d139b6..9b69139260 100644 --- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp +++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp @@ -22,11 +22,12 @@ #include #include +#include "DispatchKeys.hpp" #include "DynamicDispatcher.hpp" #include "Error.hpp" #include "JacobianTape.hpp" #include "LinearAlgebra.hpp" -#include "StateVectorManaged.hpp" +#include "StateVectorCPU.hpp" #include @@ -48,7 +49,7 @@ namespace Pennylane::Algorithms { */ template class AdjointJacobian { private: - using GeneratorFunc = void (*)(StateVectorManaged &, + using GeneratorFunc = void (*)(StateVectorCPU &, const std::vector &, const bool); // function pointer type @@ -63,25 +64,26 @@ template class AdjointJacobian { * @param obs_index Observable index position of Jacobian to update. * @param param_index Parameter index position of Jacobian to update. */ - inline void updateJacobian(const StateVectorManaged &sv1, - const StateVectorManaged &sv2, + inline void updateJacobian(const StateVectorCPU &sv1, + const StateVectorCPU &sv2, std::vector> &jac, T scaling_coeff, size_t obs_index, size_t param_index) { jac[obs_index][param_index] = -2 * scaling_coeff * - std::imag(innerProdC(sv1.getDataVector(), sv2.getDataVector())); + std::imag( + innerProdC(sv1.getData(), sv2.getData(), sv1.getLength())); } /** * @brief Utility method to apply all operations from given `%OpsData` - * object to `%StateVectorManaged` + * object to `%StateVectorCPU` * * @param state Statevector to be updated. * @param operations Operations to apply. * @param adj Take the adjoint of the given operations. */ - inline void applyOperations(StateVectorManaged &state, + inline void applyOperations(StateVectorCPU &state, const OpsData &operations, bool adj = false) { for (size_t op_idx = 0; op_idx < operations.getOpsName().size(); @@ -94,13 +96,13 @@ template class AdjointJacobian { } /** * @brief Utility method to apply the adjoint indexed operation from - * `%OpsData` object to `%StateVectorManaged`. + * `%OpsData` object to `%StateVectorCPU`. * * @param state Statevector to be updated. * @param operations Operations to apply. * @param op_idx Adjointed operation index to apply. */ - inline void applyOperationAdj(StateVectorManaged &state, + inline void applyOperationAdj(StateVectorCPU &state, const OpsData &operations, size_t op_idx) { state.applyOperation(operations.getOpsName()[op_idx], operations.getOpsWires()[op_idx], @@ -110,12 +112,12 @@ template class AdjointJacobian { /** * @brief Utility method to apply a given operations from given - * `%ObsDatum` object to `%StateVectorManaged` + * `%ObsDatum` object to `%StateVectorCPU` * * @param state Statevector to be updated. * @param observable Observable to apply. */ - inline void applyObservable(StateVectorManaged &state, + inline void applyObservable(StateVectorCPU &state, const ObsDatum &observable) { using namespace Pennylane::Util; for (size_t j = 0; j < observable.getSize(); j++) { @@ -157,8 +159,8 @@ template class AdjointJacobian { * @param reference_state Reference statevector * @param observables Vector of observables to apply to each statevector. */ - inline void applyObservables(std::vector> &states, - const StateVectorManaged &reference_state, + inline void applyObservables(std::vector> &states, + const StateVectorCPU &reference_state, const std::vector> &observables) { // clang-format off // Globally scoped exception value to be captured within OpenMP block. @@ -174,7 +176,7 @@ template class AdjointJacobian { #endif for (size_t h_i = 0; h_i < num_observables; h_i++) { try { - states[h_i].updateData(reference_state.getDataVector()); + states[h_i].updateData(reference_state.getData()); applyObservable(states[h_i], observables[h_i]); } catch (...) { #if defined(_OPENMP) @@ -207,7 +209,7 @@ template class AdjointJacobian { * @param op_idx Index of given operation within operations list to take * adjoint of. */ - inline void applyOperationsAdj(std::vector> &states, + inline void applyOperationsAdj(std::vector> &states, const OpsData &operations, size_t op_idx) { // clang-format off @@ -298,7 +300,7 @@ template class AdjointJacobian { * of parametric gates. * * For the statevector data associated with `psi` of length `num_elements`, - * we make internal copies to a `%StateVectorManaged` object, with one + * we make internal copies to a `%StateVectorCPU` object, with one * per required observable. The `operations` will be applied to the internal * statevector copies, with the operation indices participating in the * gradient calculations given in `trainableParams`, and the overall number @@ -333,7 +335,7 @@ template class AdjointJacobian { num_param_ops - 1; // total number of parametric ops // Create $U_{1:p}\vert \lambda \rangle$ - StateVectorManaged lambda(jd.getPtrStateVec(), jd.getSizeStateVec()); + StateVectorCPU lambda(jd.getPtrStateVec(), jd.getSizeStateVec()); // Apply given operations to statevector if requested if (apply_operations) { @@ -343,12 +345,14 @@ template class AdjointJacobian { const auto tp_begin = tp.begin(); auto tp_it = tp.end(); + StateVectorCPU sv{lambda.getNumQubits(), Threading::SingleThread}; // Create observable-applied state-vectors - std::vector> H_lambda( - num_observables, StateVectorManaged{lambda.getNumQubits()}); + std::vector> H_lambda( + num_observables, + StateVectorCPU{lambda.getNumQubits(), Threading::SingleThread}); applyObservables(H_lambda, lambda, obs); - StateVectorManaged mu(lambda.getNumQubits()); + StateVectorCPU mu(lambda.getNumQubits()); for (int op_idx = static_cast(ops_name.size() - 1); op_idx >= 0; op_idx--) { @@ -357,7 +361,7 @@ template class AdjointJacobian { "differentiation method"); if ((ops_name[op_idx] != "QubitStateVector") && (ops_name[op_idx] != "BasisState")) { - mu.updateData(lambda.getDataVector()); + mu.updateData(lambda.getData()); applyOperationAdj(lambda, ops, op_idx); if (ops.hasParams(op_idx)) { @@ -387,9 +391,9 @@ template class AdjointJacobian { obs_idx++) { jac[mat_row_idx + obs_idx] = -2 * scalingFactor * - std::imag(innerProdC( - H_lambda[obs_idx].getDataVector(), - mu.getDataVector())); + std::imag( + innerProdC(H_lambda[obs_idx].getData(), + mu.getData(), mu.getLength())); } trainableParamNumber--; std::advance(tp_it, -1); diff --git a/pennylane_lightning/src/examples/CMakeLists.txt b/pennylane_lightning/src/examples/CMakeLists.txt index 21bbe56c63..d58bcce5ba 100644 --- a/pennylane_lightning/src/examples/CMakeLists.txt +++ b/pennylane_lightning/src/examples/CMakeLists.txt @@ -21,14 +21,19 @@ target_link_libraries(lightning_examples INTERFACE lightning_compile_options lightning_simulator lightning_utils) -add_executable(gate_benchmark_oplist gate_benchmark_oplist.cpp) -target_link_libraries(gate_benchmark_oplist PRIVATE lightning_examples) +add_executable(benchmark_operation benchmark_operation.cpp) +target_link_libraries(benchmark_operation PRIVATE lightning_examples) + +add_executable(benchmark_operation_float benchmark_operation.cpp) +target_compile_options(benchmark_operation_float PRIVATE "-DUSE_SINGLE_PRECISION") +target_link_libraries(benchmark_operation_float PRIVATE lightning_examples) + add_executable(benchmark_multi_rz benchmark_multi_rz.cpp) target_link_libraries(benchmark_multi_rz PRIVATE lightning_examples) configure_file("compiler_info.in" "compiler_info.txt") -add_custom_command(TARGET gate_benchmark_oplist POST_BUILD +add_custom_command(TARGET benchmark_operation POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/run_gate_benchmark.sh ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/run_gate_benchmark.sh diff --git a/pennylane_lightning/src/examples/benchmark_gate_list.cpp b/pennylane_lightning/src/examples/benchmark_gate_list.cpp new file mode 100644 index 0000000000..5910ad0884 --- /dev/null +++ b/pennylane_lightning/src/examples/benchmark_gate_list.cpp @@ -0,0 +1,223 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Constant.hpp" +#include "ExampleUtil.hpp" +#include "StateVectorManaged.hpp" + +using namespace Pennylane; +using namespace Pennylane::Gates; +using namespace Pennylane::Util; + +std::string_view strip(std::string_view str) { + auto start = str.find_first_not_of(" \t"); + auto end = str.find_last_not_of(" \t"); + return str.substr(start, end - start + 1); +} + +struct GateDesc { + size_t n_wires; // number of wires the gate applies to + size_t n_params; // number of parameters the gate requires +}; + +std::vector> +parseGateLists(std::string_view arg) { + namespace Constant = Gates::Constant; + std::map available_gates_wires; + + for (const auto &[gate_op, gate_name] : Constant::gate_names) { + if (!array_has_elt(Constant::multi_qubit_gates, gate_op)) { + // We do not support multi qubit gates yet + size_t n_wires = Util::lookup(Constant::gate_wires, gate_op); + size_t n_params = Util::lookup(Constant::gate_num_params, gate_op); + available_gates_wires.emplace(gate_name, + GateDesc{n_wires, n_params}); + } + } + + if (arg.empty()) { + return {}; + } + + std::vector> ops; + + if (auto pos = arg.find_first_of('['); pos != std::string_view::npos) { + // arg is a list "[...]" + auto start = pos + 1; + auto end = arg.find_last_of(']'); + if (end == std::string_view::npos) { + throw std::invalid_argument( + "Argument must contain operators within square brackets []."); + } + arg = arg.substr(start, end - start); + } + + size_t start; + size_t end = 0; + while ((start = arg.find_first_not_of(',', end)) != std::string::npos) { + end = arg.find(',', start); + auto op_name = strip(arg.substr(start, end - start)); + + auto iter = available_gates_wires.find(std::string(op_name)); + + if (iter == available_gates_wires.end()) { + std::ostringstream ss; + ss << "Given gate " << op_name + << " is not availabe"; // TODO: Change to std::format in C++20 + throw std::invalid_argument(ss.str()); + } + ops.emplace_back(*iter); + } + return ops; +} + +/** + * @brief Benchmark Pennylane-Lightning for a given gate set + * + * Example usage: + * + * $ gate_benchmark_oplist 10 22 # Benchmark using 10 random gates (sampled + * evenly from all possible gates) for 22 qubits + * $ gate_benchmark_oplist 100 20 [PauliX, CNOT] # Benchmark using 100 + * random gates (where each gate is PauliX or CNOT) for 20 qubits + * + * The whole supported gates are PauliX, PauliY, PauliZ, Hadamard, S, T, RX, RY, + * RZ, Rot, PhaseShift, CNOT, SWAP, ControlledPhaseShift, CRX, CRY, CRZ, CRot, + * Toffoli and CSWAP. + * + * @param argc Number of arguments + * @param argv Command line arguments + * @return Returns 0 is completed successfully + */ +int main(int argc, char *argv[]) { + using TestType = double; + + // Handle input + if (argc < 4) { + std::cerr << "Wrong number of inputs. User provided " << argc - 1 + << " inputs. " + << "Usage: " + std::string(argv[0]) + + " num_gate_reps num_qubits kernel [gate_lists]\n" + "\tExample: " + << argv[0] << " 1000 10 PI [PauliX, CNOT]" + << std::endl; // Change to std::format in C++20 + return -1; + } + + size_t num_gate_reps; + size_t num_qubits; + + try { + num_gate_reps = std::stoi(argv[1]); + num_qubits = std::stoi(argv[2]); + } catch (std::exception &e) { + std::cerr << "Arguments num_gate_reps and num_qubits must be integers." + << std::endl; + return -1; + } + + std::string_view kernel_name = argv[3]; + KernelType kernel = string_to_kernel(kernel_name); + if (kernel == KernelType::None) { + std::cerr << "Kernel " << kernel_name << " is unknown." << std::endl; + return 1; + } + + // Gate list is provided + std::string op_list_s; + { + std::ostringstream ss; + for (int idx = 4; idx < argc; idx++) { + ss << argv[idx] << " "; + } + op_list_s = ss.str(); + } + + std::vector> op_list; + try { + op_list = parseGateLists(op_list_s); + } catch (std::exception &e) { + std::cerr << e.what() << std::endl; + return 1; + } + + if (op_list.empty()) { + std::cerr << "Please provide a gate list." << std::endl; + return 1; + } + + // Generate random gate sequences + std::random_device rd; + std::mt19937 re(rd()); + + std::vector random_gate_names; + std::vector> random_gate_wires; + std::vector random_inverses; + std::vector> random_gate_parameters; + + std::uniform_int_distribution gate_dist(0, op_list.size() - 1); + std::uniform_int_distribution inverse_dist(0, 1); + std::uniform_real_distribution param_dist(0.0, 2 * M_PI); + std::uniform_int_distribution wire_dist(0, num_qubits - 1); + + auto gen_param = [¶m_dist, &re]() { return param_dist(re); }; + + for (uint32_t k = 0; k < num_gate_reps; k++) { + const auto &[op_name, gate_desc] = op_list[gate_dist(re)]; + + std::vector gate_params(gate_desc.n_params, 0.0); + std::generate(gate_params.begin(), gate_params.end(), gen_param); + + random_gate_names.emplace_back(op_name); + random_inverses.emplace_back(static_cast(inverse_dist(re))); + // random_gate_wires.emplace_back(generateDistinctWires(re, num_qubits, + // gate_desc.n_wires)); + random_gate_wires.emplace_back( + generateNeighboringWires(re, num_qubits, gate_desc.n_wires)); + random_gate_parameters.emplace_back(std::move(gate_params)); + } + + // Log generated sequence if LOG is turned on + const char *env_p = std::getenv("LOG"); + try { + if (env_p != nullptr && std::stoi(env_p) != 0) { + for (size_t gate_rep = 0; gate_rep < num_gate_reps; gate_rep++) { + std::cerr << random_gate_names[gate_rep] << ", " + << random_gate_wires[gate_rep] << ", " + << random_gate_parameters[gate_rep] << std::endl; + } + } + } catch (std::exception &e) { + // Just do not print log + } + + // Run benchmark. Total num_gate_reps number of gates is used. + Pennylane::StateVectorManaged svdat{num_qubits}; + std::chrono::time_point t_start; + std::chrono::time_point t_end; + t_start = std::chrono::high_resolution_clock::now(); + + for (size_t gate_rep = 0; gate_rep < num_gate_reps; gate_rep++) { + svdat.applyOperation(kernel, std::string(random_gate_names[gate_rep]), + random_gate_wires[gate_rep], + random_inverses[gate_rep], + random_gate_parameters[gate_rep]); + } + + t_end = std::chrono::high_resolution_clock::now(); + + // Output walltime in csv format (Num Qubits, Time (milliseconds)) + const auto walltime = + 0.001 * ((std::chrono::duration_cast( + t_end - t_start)) + .count()); + std::cout << num_qubits << ", " + << walltime / static_cast(num_gate_reps) << std::endl; + return 0; +} diff --git a/pennylane_lightning/src/examples/benchmark_multi_rz.cpp b/pennylane_lightning/src/examples/benchmark_multi_rz.cpp index 180e93ba9a..49bac2ead2 100644 --- a/pennylane_lightning/src/examples/benchmark_multi_rz.cpp +++ b/pennylane_lightning/src/examples/benchmark_multi_rz.cpp @@ -1,5 +1,5 @@ #include "ExampleUtil.hpp" -#include "StateVectorManaged.hpp" +#include "StateVectorCPU.hpp" #include #include @@ -54,7 +54,7 @@ int main(int argc, char *argv[]) { params.emplace_back(param_dist(re)); } - StateVectorManaged sv{num_qubits}; + StateVectorCPU sv{num_qubits}; std::chrono::time_point t_start = std::chrono::high_resolution_clock::now(); diff --git a/pennylane_lightning/src/examples/benchmark_operation.cpp b/pennylane_lightning/src/examples/benchmark_operation.cpp new file mode 100644 index 0000000000..0978a90550 --- /dev/null +++ b/pennylane_lightning/src/examples/benchmark_operation.cpp @@ -0,0 +1,210 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Constant.hpp" +#include "ExampleUtil.hpp" +#include "StateVectorCPU.hpp" + +#ifdef USE_SINGLE_PRECISION +using PrecisionT = float; +#pragma message "Using single precision" +#else +using PrecisionT = double; +#endif + +using namespace Pennylane; +using namespace Pennylane::Gates; +using namespace Pennylane::Util; + +std::string_view strip(std::string_view str) { + auto start = str.find_first_not_of(" \t"); + auto end = str.find_last_not_of(" \t"); + return str.substr(start, end - start + 1); +} + +template +double benchmark_gate(RandomEngine &re, KernelType kernel, + const std::string &gate_name, const size_t num_reps, + const size_t num_qubits) { + const GateOperation gate_op = Util::lookup( + Util::reverse_pairs(Constant::gate_names), std::string_view(gate_name)); + const size_t num_wires = Util::lookup(Constant::gate_wires, gate_op); + const size_t num_params = Util::lookup(Constant::gate_num_params, gate_op); + + // Generate random generator sequences + std::vector> random_wires; + std::vector random_inverses; + std::vector> random_params; + random_wires.reserve(num_reps); + random_inverses.reserve(num_reps); + random_params.reserve(num_reps); + + std::uniform_int_distribution inverse_dist(0, 1); + std::uniform_real_distribution param_dist(0.0, 2 * M_PI); + + for (uint32_t k = 0; k < num_reps; k++) { + std::vector gate_params; + gate_params.reserve(num_params); + + random_inverses.emplace_back(static_cast(inverse_dist(re))); + random_wires.emplace_back( + generateNeighboringWires(re, num_qubits, num_wires)); + + for (size_t idx = 0; idx < num_params; idx++) { + gate_params.emplace_back(param_dist(re)); + } + random_params.emplace_back(std::move(gate_params)); + } + + // Log generated sequence if LOG is turned on + const char *env_p = std::getenv("LOG"); + try { + if (env_p != nullptr && std::stoi(env_p) != 0) { + for (size_t gate_rep = 0; gate_rep < num_reps; gate_rep++) { + std::cerr << gate_name << ", " << random_wires[gate_rep] << "," + << random_inverses[gate_rep] << "," + << random_params[gate_rep] << std::endl; + } + } + } catch (std::exception &e) { + // Just do not print log + } + + // Run benchmark. Total num_reps number of gates is used. + StateVectorCPU svdat{num_qubits}; + + std::chrono::time_point t_start = + std::chrono::high_resolution_clock::now(); + for (size_t gate_rep = 0; gate_rep < num_reps; gate_rep++) { + svdat.applyOperation(kernel, gate_name, random_wires[gate_rep], + random_inverses[gate_rep], + random_params[gate_rep]); + } + std::chrono::time_point t_end = + std::chrono::high_resolution_clock::now(); + + return std::chrono::duration(t_end - t_start).count(); +} + +template +double benchmark_generator(RandomEngine &re, KernelType kernel, + const std::string &gntr_name, const size_t num_reps, + const size_t num_qubits) { + const auto gntr_name_without_prefix = gntr_name.substr(9); + const GeneratorOperation gntr_op = + Util::lookup(Util::reverse_pairs(Constant::generator_names), + std::string_view(gntr_name)); + const size_t num_wires = Util::lookup(Constant::generator_wires, gntr_op); + + // Generate random generator sequences + std::vector> random_wires; + std::vector random_inverses; + random_wires.reserve(num_reps); + random_inverses.reserve(num_reps); + + std::uniform_int_distribution inverse_dist(0, 1); + + for (uint32_t k = 0; k < num_reps; k++) { + random_inverses.emplace_back(static_cast(inverse_dist(re))); + random_wires.emplace_back( + generateNeighboringWires(re, num_qubits, num_wires)); + } + + // Log generated sequence if LOG is turned on + const char *env_p = std::getenv("LOG"); + try { + if (env_p != nullptr && std::stoi(env_p) != 0) { + for (size_t gate_rep = 0; gate_rep < num_reps; gate_rep++) { + std::cerr << gntr_name << ", " << random_wires[gate_rep] << "," + << random_inverses[gate_rep] << std::endl; + } + } + } catch (std::exception &e) { + // Just do not print log + } + + // Run benchmark. Total num_reps number of gates is used. + StateVectorCPU svdat{num_qubits}; + + std::chrono::time_point t_start = + std::chrono::high_resolution_clock::now(); + for (size_t gate_rep = 0; gate_rep < num_reps; gate_rep++) { + [[maybe_unused]] auto scale = svdat.applyGenerator( + kernel, gntr_name_without_prefix, random_wires[gate_rep], + random_inverses[gate_rep]); + } + std::chrono::time_point t_end = + std::chrono::high_resolution_clock::now(); + + return std::chrono::duration(t_end - t_start).count(); +} + +/** + * @brief Benchmark Pennylane-Lightning for a given generator + * + * @param argc Number of arguments + * @param argv Command line arguments + * @return Returns 0 is completed successfully + */ +int main(int argc, char *argv[]) { + // Handle input + if (argc < 5) { // NOLINT(readability-magic-numbers) + std::cerr << "Wrong number of inputs. User provided " << argc - 1 + << " inputs. \n" + << "Usage: " + std::string(argv[0]) + + " num_reps num_qubits kernel [generator|gate]\n" + "Examples: \n" + "\t" + << argv[0] << " 1000 10 PI GeneratorCRX\n" + << "\t" << argv[0] << " 1000 10 LM CRX" + << std::endl; // Change to std::format in C++20 + return -1; + } + + size_t num_reps; + size_t num_qubits; + + try { + num_reps = std::stoi(argv[1]); + num_qubits = std::stoi(argv[2]); + } catch (std::exception &e) { + std::cerr << "Arguments num_reps and num_qubits must be integers." + << std::endl; + return -1; + } + + std::string_view kernel_name = argv[3]; + KernelType kernel = string_to_kernel(kernel_name); + if (kernel == KernelType::None) { + std::cerr << "Kernel " << kernel_name << " is unknown." << std::endl; + return 1; + } + + const std::string_view gate_or_gntr_name = argv[4]; + const std::string_view generator_prefix = "Generator"; + + std::random_device rd; + std::mt19937 re(rd()); + + double walltime; + + if (gate_or_gntr_name.substr(0, generator_prefix.length()) == + generator_prefix) { // generators + walltime = benchmark_generator( + re, kernel, std::string(gate_or_gntr_name), num_reps, num_qubits); + } else { + walltime = benchmark_gate(re, kernel, std::string(gate_or_gntr_name), + num_reps, num_qubits); + } + + // Output walltime in csv format (Num Qubits, Time (milliseconds)) + std::cout << num_qubits << ", " << walltime / static_cast(num_reps) + << std::endl; + return 0; +} diff --git a/pennylane_lightning/src/examples/gate_benchmark.cpp b/pennylane_lightning/src/examples/gate_benchmark.cpp index a8c8745a25..0f2a12d21c 100644 --- a/pennylane_lightning/src/examples/gate_benchmark.cpp +++ b/pennylane_lightning/src/examples/gate_benchmark.cpp @@ -6,7 +6,6 @@ #include #include -#include "IndicesUtil.hpp" #include "StateVectorManaged.hpp" /** @@ -18,7 +17,6 @@ */ int main(int argc, char *argv[]) { using TestType = double; - namespace IndicesUtil = Pennylane::IndicesUtil; // Handle input try { diff --git a/pennylane_lightning/src/examples/run_gate_benchmark.sh b/pennylane_lightning/src/examples/run_gate_benchmark.sh index 3e310e0f88..315c3ebdda 100755 --- a/pennylane_lightning/src/examples/run_gate_benchmark.sh +++ b/pennylane_lightning/src/examples/run_gate_benchmark.sh @@ -19,7 +19,7 @@ compiler_info=$(; +using AvailableKernels = Util::TypeList; } // namespace Pennylane diff --git a/pennylane_lightning/src/gates/Gates.hpp b/pennylane_lightning/src/gates/Gates.hpp index a9141fec48..d12598ccc4 100644 --- a/pennylane_lightning/src/gates/Gates.hpp +++ b/pennylane_lightning/src/gates/Gates.hpp @@ -330,7 +330,7 @@ e^{-i(\phi-\omega)/2}\sin(\theta/2) & e^{i(\phi+\omega)/2}\cos(\theta/2) * @return const std::vector> Return const Rot gate data. */ template -static auto getRot(U phi, U theta, U omega) -> std::vector> { +static auto getRot(U phi, U theta, U omega) -> std::array, 4> { using namespace Util; const T c = std::cos(theta / 2); const T s = std::sin(theta / 2); diff --git a/pennylane_lightning/src/gates/GateImplementationsLM.hpp b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp similarity index 87% rename from pennylane_lightning/src/gates/GateImplementationsLM.hpp rename to pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp index 9f227862b2..6e0060fc54 100644 --- a/pennylane_lightning/src/gates/GateImplementationsLM.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp @@ -16,6 +16,7 @@ * Defines kernel functions with less memory (and fast) */ #pragma once +#include "PauliGenerator.hpp" #include "BitUtil.hpp" #include "Error.hpp" @@ -23,7 +24,6 @@ #include "Gates.hpp" #include "KernelType.hpp" #include "LinearAlgebra.hpp" -#include "PauliGenerator.hpp" #include #include @@ -38,10 +38,20 @@ namespace Pennylane::Gates { * @tparam PrecisionT Floating point precision of underlying statevector data */ class GateImplementationsLM : public PauliGenerator { + private: + /* Alias utility functions */ + static constexpr auto fillLeadingOnes = Util::fillLeadingOnes; + static constexpr auto fillTrailingOnes = Util::fillTrailingOnes; + static constexpr auto bitswap = Util::bitswap; + public: constexpr static KernelType kernel_id = KernelType::LM; constexpr static std::string_view name = "LM"; - constexpr static uint32_t data_alignment_in_bytes = 1; + template + constexpr static size_t required_alignment = + std::alignment_of_v; + template + constexpr static size_t packed_bytes = sizeof(PrecisionT); constexpr static std::array implemented_gates = { GateOperation::PauliX, GateOperation::PauliY, @@ -53,25 +63,26 @@ class GateImplementationsLM : public PauliGenerator { GateOperation::CZ, GateOperation::CNOT, GateOperation::SWAP, GateOperation::ControlledPhaseShift, GateOperation::CRX, GateOperation::CRY, - GateOperation::CRZ, GateOperation::IsingXX, - GateOperation::IsingYY, GateOperation::IsingZZ, - GateOperation::MultiRZ, GateOperation::Matrix}; + GateOperation::CRZ, GateOperation::CRot, + GateOperation::IsingXX, GateOperation::IsingYY, + GateOperation::IsingZZ, GateOperation::MultiRZ, + GateOperation::Matrix}; constexpr static std::array implemented_generators = { - GeneratorOperation::RX, GeneratorOperation::RY, - GeneratorOperation::RZ, GeneratorOperation::PhaseShift, - GeneratorOperation::CRX, GeneratorOperation::CRY, - GeneratorOperation::CRZ, GeneratorOperation::IsingXX, - GeneratorOperation::IsingYY, GeneratorOperation::IsingZZ, + GeneratorOperation::RX, + GeneratorOperation::RY, + GeneratorOperation::RZ, + GeneratorOperation::PhaseShift, + GeneratorOperation::CRX, + GeneratorOperation::CRY, + GeneratorOperation::CRZ, + GeneratorOperation::IsingXX, + GeneratorOperation::IsingYY, + GeneratorOperation::IsingZZ, + GeneratorOperation::ControlledPhaseShift, GeneratorOperation::MultiRZ, }; - private: - /* Alias utility functions */ - static constexpr auto fillLeadingOnes = Util::fillLeadingOnes; - static constexpr auto fillTrailingOnes = Util::fillTrailingOnes; - static constexpr auto bitswap = Util::bitswap; - /** * @brief Apply a single qubit gate to the statevector. * @@ -230,7 +241,6 @@ class GateImplementationsLM : public PauliGenerator { } } - public: template static void applyMatrix(std::complex *arr, size_t num_qubits, const std::complex *matrix, @@ -257,8 +267,8 @@ class GateImplementationsLM : public PauliGenerator { size_t idx = k | inner_idx; size_t n_wires = wires.size(); for (size_t pos = 0; pos < n_wires; pos++) { - bitswap(idx, n_wires - pos - 1, - num_qubits - wires[pos] - 1); + idx = bitswap(idx, n_wires - pos - 1, + num_qubits - wires[pos] - 1); } indices[inner_idx] = idx; coeffs_in[inner_idx] = arr[idx]; @@ -281,9 +291,8 @@ class GateImplementationsLM : public PauliGenerator { const size_t num_qubits, const std::vector &wires, [[maybe_unused]] bool inverse) { - using Util::fillLeadingOnes, Util::fillTrailingOnes; - assert(wires.size() == 1); + const size_t rev_wire = num_qubits - wires[0] - 1; const size_t rev_wire_shift = (static_cast(1U) << rev_wire); const size_t wire_parity = fillTrailingOnes(rev_wire); @@ -341,10 +350,20 @@ class GateImplementationsLM : public PauliGenerator { const std::vector &wires, [[maybe_unused]] bool inverse) { assert(wires.size() == 1); - constexpr auto isqrt2 = Util::INVSQRT2(); - constexpr static std::array, 4> hadamardMat = { - isqrt2, isqrt2, isqrt2, -isqrt2}; - applySingleQubitOp(arr, num_qubits, hadamardMat.data(), wires[0]); + constexpr static auto isqrt2 = Util::INVSQRT2(); + const size_t rev_wire = num_qubits - wires[0] - 1; + const size_t rev_wire_shift = (static_cast(1U) << rev_wire); + const size_t wire_parity = fillTrailingOnes(rev_wire); + const size_t wire_parity_inv = fillLeadingOnes(rev_wire + 1); + + for (size_t k = 0; k < Util::exp2(num_qubits - 1); k++) { + const size_t i0 = ((k << 1U) & wire_parity_inv) | (wire_parity & k); + const size_t i1 = i0 | rev_wire_shift; + const std::complex v0 = arr[i0]; + const std::complex v1 = arr[i1]; + arr[i0] = isqrt2 * v0 + isqrt2 * v1; + arr[i1] = isqrt2 * v0 - isqrt2 * v1; + } } template @@ -377,11 +396,10 @@ class GateImplementationsLM : public PauliGenerator { const size_t wire_parity = fillTrailingOnes(rev_wire); const size_t wire_parity_inv = fillLeadingOnes(rev_wire + 1); - const std::complex shift = - (inverse) ? std::conj(std::exp(std::complex( - 0, static_cast(M_PI / 4)))) - : std::exp(std::complex( - 0, static_cast(M_PI / 4))); + constexpr static auto isqrt2 = Util::INVSQRT2(); + + const std::complex shift = {isqrt2, + inverse ? -isqrt2 : isqrt2}; for (size_t k = 0; k < Util::exp2(num_qubits - 1); k++) { const size_t i0 = ((k << 1U) & wire_parity_inv) | (wire_parity & k); @@ -395,8 +413,6 @@ class GateImplementationsLM : public PauliGenerator { const size_t num_qubits, const std::vector &wires, bool inverse, ParamT angle) { - using Util::fillLeadingOnes, Util::fillTrailingOnes; - assert(wires.size() == 1); const size_t rev_wire = num_qubits - wires[0] - 1; const size_t rev_wire_shift = (static_cast(1U) << rev_wire); @@ -419,14 +435,25 @@ class GateImplementationsLM : public PauliGenerator { const std::vector &wires, bool inverse, ParamT angle) { assert(wires.size() == 1); + const size_t rev_wire = num_qubits - wires[0] - 1; + const size_t rev_wire_shift = (static_cast(1U) << rev_wire); + const size_t wire_parity = fillTrailingOnes(rev_wire); + const size_t wire_parity_inv = fillLeadingOnes(rev_wire + 1); const PrecisionT c = std::cos(angle / 2); const PrecisionT js = (inverse) ? -std::sin(-angle / 2) : std::sin(-angle / 2); - const std::array, 4> RXMat = { - c, Util::IMAG() * js, Util::IMAG() * js, c}; - applySingleQubitOp(arr, num_qubits, RXMat.data(), wires[0]); + for (size_t k = 0; k < Util::exp2(num_qubits - 1); k++) { + const size_t i0 = ((k << 1U) & wire_parity_inv) | (wire_parity & k); + const size_t i1 = i0 | rev_wire_shift; + const std::complex v0 = arr[i0]; + const std::complex v1 = arr[i1]; + arr[i0] = c * v0 + + std::complex{-imag(v1) * js, real(v1) * js}; + arr[i1] = std::complex{-imag(v0) * js, real(v0) * js} + + c * v1; + } } template @@ -434,13 +461,25 @@ class GateImplementationsLM : public PauliGenerator { const std::vector &wires, bool inverse, ParamT angle) { assert(wires.size() == 1); + const size_t rev_wire = num_qubits - wires[0] - 1; + const size_t rev_wire_shift = (static_cast(1U) << rev_wire); + const size_t wire_parity = fillTrailingOnes(rev_wire); + const size_t wire_parity_inv = fillLeadingOnes(rev_wire + 1); const PrecisionT c = std::cos(angle / 2); const PrecisionT s = (inverse) ? -std::sin(angle / 2) : std::sin(angle / 2); - const std::array, 4> RYMat = {c, -s, s, c}; - applySingleQubitOp(arr, num_qubits, RYMat.data(), wires[0]); + for (size_t k = 0; k < Util::exp2(num_qubits - 1); k++) { + const size_t i0 = ((k << 1U) & wire_parity_inv) | (wire_parity & k); + const size_t i1 = i0 | rev_wire_shift; + const std::complex v0 = arr[i0]; + const std::complex v1 = arr[i1]; + arr[i0] = std::complex{c * real(v0) - s * real(v1), + c * imag(v0) - s * imag(v1)}; + arr[i1] = std::complex{s * real(v0) + c * real(v1), + s * imag(v0) + c * imag(v1)}; + } } template @@ -571,7 +610,6 @@ class GateImplementationsLM : public PauliGenerator { const size_t parity_middle = fillLeadingOnes(rev_wire_min + 1) & fillTrailingOnes(rev_wire_max); - /* This is faster than iterate over all indices */ for (size_t k = 0; k < Util::exp2(num_qubits - 2); k++) { const size_t i00 = ((k << 2U) & parity_high) | ((k << 1U) & parity_middle) | (k & parity_low); @@ -580,6 +618,43 @@ class GateImplementationsLM : public PauliGenerator { } } + template + static void applyCRot(std::complex *arr, size_t num_qubits, + const std::vector &wires, bool inverse, + ParamT phi, ParamT theta, ParamT omega) { + assert(wires.size() == 2); + + const size_t rev_wire0 = num_qubits - wires[1] - 1; + const size_t rev_wire1 = num_qubits - wires[0] - 1; // Control qubit + + const size_t rev_wire0_shift = static_cast(1U) << rev_wire0; + const size_t rev_wire1_shift = static_cast(1U) << rev_wire1; + + const size_t rev_wire_min = std::min(rev_wire0, rev_wire1); + const size_t rev_wire_max = std::max(rev_wire0, rev_wire1); + + const size_t parity_low = fillTrailingOnes(rev_wire_min); + const size_t parity_high = fillLeadingOnes(rev_wire_max + 1); + const size_t parity_middle = + fillLeadingOnes(rev_wire_min + 1) & fillTrailingOnes(rev_wire_max); + + const auto rotMat = + (inverse) ? Gates::getRot(-omega, -theta, -phi) + : Gates::getRot(phi, theta, omega); + + for (size_t k = 0; k < Util::exp2(num_qubits - 2); k++) { + const size_t i00 = ((k << 2U) & parity_high) | + ((k << 1U) & parity_middle) | (k & parity_low); + const size_t i10 = i00 | rev_wire1_shift; + const size_t i11 = i00 | rev_wire0_shift | rev_wire1_shift; + + const std::complex v0 = arr[i10]; + const std::complex v1 = arr[i11]; + arr[i10] = rotMat[0] * v0 + rotMat[1] * v1; + arr[i11] = rotMat[2] * v0 + rotMat[3] * v1; + } + } + template static void applySWAP(std::complex *arr, size_t num_qubits, const std::vector &wires, @@ -863,8 +938,10 @@ class GateImplementationsLM : public PauliGenerator { const std::complex v10 = arr[i10]; const std::complex v11 = arr[i11]; - arr[i10] = c * v10 + -s * v11; - arr[i11] = s * v10 + c * v11; + arr[i10] = std::complex{c * real(v10) - s * real(v11), + c * imag(v10) - s * imag(v11)}; + arr[i11] = std::complex{s * real(v10) + c * real(v11), + s * imag(v10) + c * imag(v11)}; } } @@ -1025,6 +1102,7 @@ class GateImplementationsLM : public PauliGenerator { // NOLINTNEXTLINE(readability-magic-numbers) return -static_cast(0.5); } + template [[nodiscard]] static auto applyGeneratorIsingZZ(std::complex *arr, size_t num_qubits, @@ -1176,6 +1254,42 @@ class GateImplementationsLM : public PauliGenerator { return -static_cast(0.5); } + template + [[nodiscard]] static auto applyGeneratorControlledPhaseShift( + std::complex *arr, size_t num_qubits, + const std::vector &wires, [[maybe_unused]] bool adj) + -> PrecisionT { + using ComplexPrecisionT = std::complex; + assert(wires.size() == 2); + + const size_t rev_wire0 = num_qubits - wires[1] - 1; + const size_t rev_wire1 = num_qubits - wires[0] - 1; // Control qubit + + const size_t rev_wire0_shift = static_cast(1U) << rev_wire0; + const size_t rev_wire1_shift = static_cast(1U) << rev_wire1; + + const size_t rev_wire_min = std::min(rev_wire0, rev_wire1); + const size_t rev_wire_max = std::max(rev_wire0, rev_wire1); + + const size_t parity_low = fillTrailingOnes(rev_wire_min); + const size_t parity_high = fillLeadingOnes(rev_wire_max + 1); + const size_t parity_middle = + fillLeadingOnes(rev_wire_min + 1) & fillTrailingOnes(rev_wire_max); + + for (size_t k = 0; k < Util::exp2(num_qubits - 2); k++) { + const size_t i00 = ((k << 2U) & parity_high) | + ((k << 1U) & parity_middle) | (k & parity_low); + const size_t i01 = i00 | rev_wire0_shift; + const size_t i10 = i00 | rev_wire1_shift; + + arr[i00] = ComplexPrecisionT{}; + arr[i01] = ComplexPrecisionT{}; + arr[i10] = ComplexPrecisionT{}; + } + // NOLINTNEXTLINE(readability-magic-numbers) + return static_cast(1); + } + template [[nodiscard]] static auto applyGeneratorMultiRZ(std::complex *arr, size_t num_qubits, diff --git a/pennylane_lightning/src/gates/GateImplementationsPI.hpp b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp similarity index 91% rename from pennylane_lightning/src/gates/GateImplementationsPI.hpp rename to pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp index 69a5826efc..82a0edf924 100644 --- a/pennylane_lightning/src/gates/GateImplementationsPI.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp @@ -24,13 +24,14 @@ #endif /// @endcond +#include "PauliGenerator.hpp" + #include "BitUtil.hpp" #include "GateOperation.hpp" #include "GateUtil.hpp" #include "Gates.hpp" #include "KernelType.hpp" #include "LinearAlgebra.hpp" -#include "PauliGenerator.hpp" #include #include @@ -48,7 +49,11 @@ class GateImplementationsPI : public PauliGenerator { public: constexpr static KernelType kernel_id = KernelType::PI; constexpr static std::string_view name = "PI"; - constexpr static uint32_t data_alignment_in_bytes = 1; + template + constexpr static size_t required_alignment = + std::alignment_of_v; + template + constexpr static uint32_t packed_bytes = std::alignment_of_v; constexpr static std::array implemented_gates = { GateOperation::PauliX, GateOperation::PauliY, @@ -66,10 +71,17 @@ class GateImplementationsPI : public PauliGenerator { GateOperation::CSWAP, GateOperation::MultiRZ, GateOperation::Matrix}; constexpr static std::array implemented_generators = { - GeneratorOperation::RX, GeneratorOperation::RY, - GeneratorOperation::RZ, GeneratorOperation::PhaseShift, - GeneratorOperation::CRX, GeneratorOperation::CRY, - GeneratorOperation::CRZ, GeneratorOperation::ControlledPhaseShift}; + GeneratorOperation::RX, + GeneratorOperation::RY, + GeneratorOperation::RZ, + GeneratorOperation::PhaseShift, + GeneratorOperation::IsingXX, + GeneratorOperation::IsingYY, + GeneratorOperation::IsingZZ, + GeneratorOperation::CRX, + GeneratorOperation::CRY, + GeneratorOperation::CRZ, + GeneratorOperation::ControlledPhaseShift}; /** * @brief Apply a given matrix directly to the statevector. @@ -327,8 +339,7 @@ class GateImplementationsPI : public PauliGenerator { assert(wires.size() == 1); const auto [indices, externalIndices] = GateIndices(wires, num_qubits); - const std::vector> rot = - Gates::getRot(phi, theta, omega); + const auto rot = Gates::getRot(phi, theta, omega); const std::complex t1 = (inverse) ? std::conj(rot[0]) : rot[0]; @@ -687,6 +698,63 @@ class GateImplementationsPI : public PauliGenerator { return -static_cast(0.5); } + template + [[nodiscard]] static auto + applyGeneratorIsingXX(std::complex *arr, size_t num_qubits, + const std::vector &wires, + [[maybe_unused]] bool adj) -> PrecisionT { + assert(wires.size() == 2); + const auto [indices, externalIndices] = GateIndices(wires, num_qubits); + + for (const size_t &externalIndex : externalIndices) { + std::complex *shiftedState = arr + externalIndex; + std::swap(shiftedState[indices[0]], shiftedState[indices[3]]); + std::swap(shiftedState[indices[2]], shiftedState[indices[1]]); + } + + // NOLINTNEXTLINE(readability-magic-numbers) + return -static_cast(0.5); + } + + template + [[nodiscard]] static auto + applyGeneratorIsingYY(std::complex *arr, size_t num_qubits, + const std::vector &wires, + [[maybe_unused]] bool adj) -> PrecisionT { + assert(wires.size() == 2); + const auto [indices, externalIndices] = GateIndices(wires, num_qubits); + + for (const size_t &externalIndex : externalIndices) { + std::complex *shiftedState = arr + externalIndex; + const auto v00 = shiftedState[indices[0]]; + shiftedState[indices[0]] = -shiftedState[indices[3]]; + shiftedState[indices[3]] = -v00; + std::swap(shiftedState[indices[2]], shiftedState[indices[1]]); + } + + // NOLINTNEXTLINE(readability-magic-numbers) + return -static_cast(0.5); + } + + template + [[nodiscard]] static auto + applyGeneratorIsingZZ(std::complex *arr, size_t num_qubits, + const std::vector &wires, + [[maybe_unused]] bool adj) -> PrecisionT { + assert(wires.size() == 2); + const auto [indices, externalIndices] = GateIndices(wires, num_qubits); + + for (const size_t &externalIndex : externalIndices) { + std::complex *shiftedState = arr + externalIndex; + + shiftedState[indices[1]] *= -1; + shiftedState[indices[2]] *= -1; + } + + // NOLINTNEXTLINE(readability-magic-numbers) + return -static_cast(0.5); + } + template [[nodiscard]] static auto applyGeneratorCRY(std::complex *arr, size_t num_qubits, diff --git a/pennylane_lightning/src/gates/PauliGenerator.hpp b/pennylane_lightning/src/gates/cpu_kernels/PauliGenerator.hpp similarity index 100% rename from pennylane_lightning/src/gates/PauliGenerator.hpp rename to pennylane_lightning/src/gates/cpu_kernels/PauliGenerator.hpp diff --git a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp new file mode 100644 index 0000000000..72613bc386 --- /dev/null +++ b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp @@ -0,0 +1,400 @@ +// Copyright 2022 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/** + * @file + */ +#include "DispatchKeys.hpp" +#include "GateOperation.hpp" +#include "KernelType.hpp" + +#include +#include + +namespace Pennylane { + +inline auto larger_than(size_t size) { + return [=](size_t num_qubits) { return num_qubits > size; }; +} +inline auto larger_than_equal_to(size_t size) { + return [=](size_t num_qubits) { return num_qubits >= size; }; +} +inline auto less_than(size_t size) { + return [=](size_t num_qubits) { return num_qubits < size; }; +} +inline auto less_than_equal_to(size_t size) { + return [=](size_t num_qubits) { return num_qubits <= size; }; +} +inline auto in_between_closed(size_t l1, size_t l2) { + return [=](size_t num_qubits) { + return (l1 <= num_qubits) && (num_qubits <= l2); + }; +} + +class DefaultKernelsForStateVector { + private: + const static inline std::unordered_map> + allowed_kernels{ + {CPUMemoryModel::Unaligned, + {Gates::KernelType::LM, Gates::KernelType::PI}}, + {CPUMemoryModel::Aligned256, + {Gates::KernelType::LM, Gates::KernelType::PI}}, + {CPUMemoryModel::Aligned512, + {Gates::KernelType::LM, Gates::KernelType::PI}}, + }; + + std::unordered_map< + Gates::GateOperation, + std::vector, + Gates::KernelType>>> + gate_kernel_map_; + + std::unordered_map< + Gates::GeneratorOperation, + std::vector, + Gates::KernelType>>> + generator_kernel_map_; + + void registerDefaultGates() { + using Gates::GateOperation; + auto &instance = *this; + auto all_qubit_numbers = []([[maybe_unused]] size_t num_qubits) { + return true; + }; + /* Single-qubit gates */ + instance.assignKernelForGate(GateOperation::PauliX, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::PauliY, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::PauliZ, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::Hadamard, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::S, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::T, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::PhaseShift, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::RX, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::RY, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::RZ, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::Rot, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + /* Two-qubit gates */ + instance.assignKernelForGate(GateOperation::CNOT, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::CY, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::CZ, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::ControlledPhaseShift, + all_threading, all_memory_model, + all_qubit_numbers, Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::SWAP, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + + instance.assignKernelForGate(GateOperation::IsingXX, all_threading, + all_memory_model, less_than(12), + Gates::KernelType::LM); + instance.assignKernelForGate( + GateOperation::IsingXX, all_threading, all_memory_model, + in_between_closed(12, 20), Gates::KernelType::PI); + instance.assignKernelForGate(GateOperation::IsingXX, all_threading, + all_memory_model, larger_than(20), + Gates::KernelType::LM); + + instance.assignKernelForGate(GateOperation::IsingYY, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::IsingZZ, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::CRX, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::CRY, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::CRZ, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::CRot, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForGate(GateOperation::Toffoli, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::PI); + instance.assignKernelForGate(GateOperation::CSWAP, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::PI); + instance.assignKernelForGate(GateOperation::MultiRZ, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + } + + void registerDefaultGenerators() { + using Gates::GeneratorOperation; + using Gates::KernelType; + auto &instance = *this; + auto all_qubit_numbers = []([[maybe_unused]] size_t num_qubits) { + return true; + }; + + instance.assignKernelForGenerator(GeneratorOperation::PhaseShift, + all_threading, all_memory_model, + all_qubit_numbers, KernelType::LM); + instance.assignKernelForGenerator(GeneratorOperation::RX, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForGenerator(GeneratorOperation::RY, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForGenerator(GeneratorOperation::RZ, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForGenerator(GeneratorOperation::IsingXX, + all_threading, all_memory_model, + all_qubit_numbers, KernelType::LM); + instance.assignKernelForGenerator(GeneratorOperation::IsingYY, + all_threading, all_memory_model, + all_qubit_numbers, KernelType::LM); + instance.assignKernelForGenerator(GeneratorOperation::IsingZZ, + all_threading, all_memory_model, + all_qubit_numbers, KernelType::LM); + instance.assignKernelForGenerator(GeneratorOperation::CRX, + all_threading, all_memory_model, + all_qubit_numbers, KernelType::LM); + instance.assignKernelForGenerator(GeneratorOperation::CRY, + all_threading, all_memory_model, + all_qubit_numbers, KernelType::LM); + instance.assignKernelForGenerator(GeneratorOperation::CRZ, + all_threading, all_memory_model, + all_qubit_numbers, KernelType::LM); + instance.assignKernelForGenerator( + GeneratorOperation::ControlledPhaseShift, all_threading, + all_memory_model, all_qubit_numbers, KernelType::LM); + instance.assignKernelForGenerator(GeneratorOperation::MultiRZ, + all_threading, all_memory_model, + all_qubit_numbers, KernelType::LM); + } + + DefaultKernelsForStateVector() { + registerDefaultGates(); + registerDefaultGenerators(); + } + + public: + struct AllThreading {}; + + struct AllMemoryModel {}; + + constexpr static AllThreading all_threading{}; + constexpr static AllMemoryModel all_memory_model{}; + + static auto getInstance() -> DefaultKernelsForStateVector & { + static DefaultKernelsForStateVector instance; + + return instance; + } + + void + assignKernelForGate(Gates::GateOperation gate_op, Threading threading, + CPUMemoryModel memory_model, + const std::function &num_qubits_criterion, + Gates::KernelType kernel) { + if (std::find(allowed_kernels.at(memory_model).cbegin(), + allowed_kernels.at(memory_model).cend(), + kernel) == allowed_kernels.at(memory_model).cend()) { + throw std::invalid_argument("The given kernel is now allowed for " + "the given memory model."); + } + gate_kernel_map_[gate_op].emplace_back( + toDispatchKey(threading, memory_model), num_qubits_criterion, + kernel); + } + + void + assignKernelForGate(Gates::GateOperation gate_op, + [[maybe_unused]] AllThreading dummy, + CPUMemoryModel memory_model, + const std::function &num_qubits_criterion, + Gates::KernelType kernel) { + Util::for_each_enum([=](Threading threading) { + assignKernelForGate(gate_op, threading, memory_model, + num_qubits_criterion, kernel); + }); + } + + void + assignKernelForGate(Gates::GateOperation gate_op, Threading threading, + [[maybe_unused]] AllMemoryModel dummy, + const std::function &num_qubits_criterion, + Gates::KernelType kernel) { + Util::for_each_enum([=](CPUMemoryModel memory_model) { + assignKernelForGate(gate_op, threading, memory_model, + num_qubits_criterion, kernel); + }); + } + + void + assignKernelForGate(Gates::GateOperation gate_op, + [[maybe_unused]] AllThreading dummy1, + [[maybe_unused]] AllMemoryModel dummy2, + const std::function &num_qubits_criterion, + Gates::KernelType kernel) { + Util::for_each_enum( + [=](Threading threading, CPUMemoryModel memory_model) { + assignKernelForGate(gate_op, threading, memory_model, + num_qubits_criterion, kernel); + }); + } + + void assignKernelForGenerator( + Gates::GeneratorOperation gntr_op, Threading threading, + CPUMemoryModel memory_model, + const std::function &num_qubits_criterion, + Gates::KernelType kernel) { + if (std::find(allowed_kernels.at(memory_model).cbegin(), + allowed_kernels.at(memory_model).cend(), + kernel) == allowed_kernels.at(memory_model).cend()) { + throw std::invalid_argument("The given kernel is now allowed for " + "the given memory model."); + } + generator_kernel_map_[gntr_op].emplace_back( + toDispatchKey(threading, memory_model), num_qubits_criterion, + kernel); + } + + void assignKernelForGenerator( + Gates::GeneratorOperation gntr_op, [[maybe_unused]] AllThreading dummy, + CPUMemoryModel memory_model, + const std::function &num_qubits_criterion, + Gates::KernelType kernel) { + Util::for_each_enum([=](Threading threading) { + assignKernelForGenerator(gntr_op, threading, memory_model, + num_qubits_criterion, kernel); + }); + } + + void assignKernelForGenerator( + Gates::GeneratorOperation gntr_op, Threading threading, + [[maybe_unused]] AllMemoryModel dummy, + const std::function &num_qubits_criterion, + Gates::KernelType kernel) { + Util::for_each_enum([=](CPUMemoryModel memory_model) { + assignKernelForGenerator(gntr_op, threading, memory_model, + num_qubits_criterion, kernel); + }); + } + + void assignKernelForGenerator( + Gates::GeneratorOperation gntr_op, [[maybe_unused]] AllThreading dummy1, + [[maybe_unused]] AllMemoryModel dummy2, + const std::function &num_qubits_criterion, + Gates::KernelType kernel) { + Util::for_each_enum( + [=](Threading threading, CPUMemoryModel memory_model) { + assignKernelForGenerator(gntr_op, threading, memory_model, + num_qubits_criterion, kernel); + }); + } + + /** + * @brief Create default kernels for all generators + * @param num_qubits Number of qubits + * @param threading Threading context + * @param memory_model Memory model of the underlying data + */ + auto getGeneratorKernelMap(size_t num_qubits, Threading threading, + CPUMemoryModel memory_model) const + -> std::unordered_map { + uint32_t dispatch_key = toDispatchKey(threading, memory_model); + + std::unordered_map + kernel_for_generators; + + for (auto generator = Gates::GeneratorOperation::BEGIN; + generator != Gates::GeneratorOperation::END; + generator = static_cast( + static_cast(generator) + 1)) { + + const auto iter = + std::find_if(generator_kernel_map_.at(generator).cbegin(), + generator_kernel_map_.at(generator).cend(), + [dispatch_key = dispatch_key, + num_qubits = num_qubits](const auto &t) { + return (std::get<0>(t) == dispatch_key && + std::get<1>(t)(num_qubits)); + }); + if (iter == generator_kernel_map_.at(generator).cend()) { + throw std::range_error("Cannot find registered kernel for a " + "dispatch key and number of qubits."); + } + kernel_for_generators.emplace(generator, std::get<2>(*iter)); + } + return kernel_for_generators; + } + + auto getGateKernelMap(size_t num_qubits, Threading threading, + CPUMemoryModel memory_model) const + -> std::unordered_map { + uint32_t dispatch_key = toDispatchKey(threading, memory_model); + + std::unordered_map + kernel_for_gates; + + for (auto gate = Gates::GateOperation::BEGIN; + gate != Gates::GateOperation::END; + gate = static_cast( + static_cast(gate) + 1)) { + + if (gate == Gates::GateOperation::Matrix) { + continue; + } + + const auto iter = std::find_if( + gate_kernel_map_.at(gate).cbegin(), + gate_kernel_map_.at(gate).cend(), [=](const auto &t) { + return (std::get<0>(t) == dispatch_key && + std::get<1>(t)(num_qubits)); + }); + if (iter == gate_kernel_map_.at(gate).cend()) { + throw std::range_error("Cannot find registered kernel for a " + "dispatch key and number of qubits."); + } + kernel_for_gates.emplace(gate, std::get<2>(*iter)); + } + return kernel_for_gates; + } +}; +} // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/DispatchKeys.hpp b/pennylane_lightning/src/simulator/DispatchKeys.hpp new file mode 100644 index 0000000000..08265c9c59 --- /dev/null +++ b/pennylane_lightning/src/simulator/DispatchKeys.hpp @@ -0,0 +1,87 @@ +// Copyright 2022 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/** + * @file + * Define keys to select kernels + */ +#pragma once + +#include "Macros.hpp" + +#include + +#ifdef PL_USE_OMP +#include +#endif + +namespace Pennylane { +enum class Threading : uint8_t { + SingleThread, + MultiThread, + END, + BEGIN = SingleThread, +}; + +enum class CPUMemoryModel : uint8_t { + Unaligned, + Aligned256, + Aligned512, + END, + BEGIN = Unaligned, +}; + +constexpr uint32_t toDispatchKey(Threading threading, + CPUMemoryModel memory_model) { + /* Threading is in higher priority */ + return (static_cast(threading) << 8U) | + static_cast(memory_model); +} + +inline auto getMemoryModel(const void *ptr) -> CPUMemoryModel { + if ((reinterpret_cast(ptr) % 64) == 0) { + return CPUMemoryModel::Aligned512; + } + + if ((reinterpret_cast(ptr) % 32) == 0) { + return CPUMemoryModel::Aligned256; + } + + return CPUMemoryModel::Unaligned; +} + +/** + * @brief Choose the best threading based on the current context. + */ +inline auto bestThreading() -> Threading { +#ifdef PL_USE_OMP + if (omp_in_parallel() != 0) { + // We are already inside of the openmp parallel region (e.g. + // inside adjoint diff). + return Threading::SingleThread; + } + return Threading::MultiThread; +#endif + return Threading::SingleThread; +} + +constexpr inline auto bestCPUMemoryModel() -> CPUMemoryModel { + if constexpr (use_avx512f) { + return CPUMemoryModel::Aligned512; + } else if (use_avx2) { + return CPUMemoryModel::Aligned256; + } + return CPUMemoryModel::Unaligned; +} + +} // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.cpp b/pennylane_lightning/src/simulator/DynamicDispatcher.cpp index 315b7a102e..22187d4fcf 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.cpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.cpp @@ -143,9 +143,7 @@ void registerAllImplementedGateOps() { auto registerGateToDispatcher = [&dispatcher]( const auto &gate_op_func_pair) { const auto &[gate_op, func] = gate_op_func_pair; - std::string op_name = - std::string(lookup(Gates::Constant::gate_names, gate_op)); - dispatcher.registerGateOperation(op_name, GateImplementation::kernel_id, + dispatcher.registerGateOperation(gate_op, GateImplementation::kernel_id, func); return gate_op; }; @@ -169,10 +167,8 @@ void registerAllImplementedGeneratorOps() { auto registerGeneratorToDispatcher = [&dispatcher](const auto &gntr_op_func_pair) { const auto &[gntr_op, func] = gntr_op_func_pair; - std::string op_name = - std::string(lookup(Gates::Constant::generator_names, gntr_op)); dispatcher.registerGeneratorOperation( - op_name, GateImplementation::kernel_id, func); + gntr_op, GateImplementation::kernel_id, func); return gntr_op; }; diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp index 83536f9076..284e221002 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp @@ -36,10 +36,9 @@ /// @cond DEV namespace Pennylane::Internal { struct PairHash { - size_t - operator()(const std::pair &p) const { - return std::hash()(p.first) ^ - std::hash()(static_cast(p.second)); + template + size_t operator()(const std::pair &p) const { + return std::hash()(p.first) ^ std::hash()(p.second); } }; /** @@ -60,12 +59,12 @@ namespace Pennylane { template struct registerBeforeMain; template <> struct registerBeforeMain { - static inline int dummy = + static inline const int dummy = Internal::registerAllAvailableKernels(); }; template <> struct registerBeforeMain { - static inline int dummy = + static inline const int dummy = Internal::registerAllAvailableKernels(); }; @@ -89,28 +88,19 @@ template class DynamicDispatcher { bool /*adjoint*/); private: - std::unordered_map gate_wires_; + std::unordered_map str_to_gates_; + std::unordered_map str_to_gntrs_; - std::unordered_map gate_kernel_map_; - std::unordered_map generator_kernel_map_; - - std::unordered_map, GateFunc, - Internal::PairHash> + std::unordered_map, + GateFunc, Internal::PairHash> gates_; - std::unordered_map, GeneratorFunc, - Internal::PairHash> + std::unordered_map, + GeneratorFunc, Internal::PairHash> generators_; - std::string removeGeneratorPrefix(const std::string &op_name) { - constexpr std::string_view prefix = "Generator"; - // TODO: change to string::starts_with in C++20 - if (op_name.rfind(prefix) != 0) { - return op_name; - } - return op_name.substr(prefix.size()); - } - std::string_view removeGeneratorPrefix(std::string_view op_name) { + constexpr static auto removeGeneratorPrefix(std::string_view op_name) + -> std::string_view { constexpr std::string_view prefix = "Generator"; // TODO: change to string::starts_with in C++20 if (op_name.rfind(prefix) != 0) { @@ -121,38 +111,13 @@ template class DynamicDispatcher { DynamicDispatcher() { using Gates::KernelType; - for (const auto &[gate_op, n_wires] : Gates::Constant::gate_wires) { - gate_wires_.emplace( - Util::lookup(Gates::Constant::gate_names, gate_op), n_wires); - } for (const auto &[gate_op, gate_name] : Gates::Constant::gate_names) { - KernelType kernel = Util::lookup( - Gates::Constant::default_kernel_for_gates, gate_op); - const auto implemented_gates = implementedGatesForKernel(kernel); - if (std::find(std::cbegin(implemented_gates), - std::cend(implemented_gates), - gate_op) == std::cend(implemented_gates)) { - PL_ABORT("Default kernel for " + std::string(gate_name) + - " does not implement the gate."); - } - gate_kernel_map_.emplace(gate_name, kernel); + str_to_gates_.emplace(gate_name, gate_op); } - for (const auto &[gntr_op, gntr_name] : Gates::Constant::generator_names) { - KernelType kernel = Util::lookup( - Gates::Constant::default_kernel_for_generators, gntr_op); - const auto implemented_generators = - implementedGeneratorsForKernel(kernel); - if (std::find(std::cbegin(implemented_generators), - std::cend(implemented_generators), - gntr_op) == std::cend(implemented_generators)) { - PL_ABORT("Default kernel for " + std::string(gntr_name) + - " does not implement the generator."); - } - generator_kernel_map_.emplace(removeGeneratorPrefix(gntr_name), - kernel); + str_to_gntrs_.emplace(removeGeneratorPrefix(gntr_name), gntr_op); } } @@ -162,15 +127,24 @@ template class DynamicDispatcher { return singleton; } + auto strToGateOp(const std::string &gate_name) const + -> Gates::GateOperation { + return str_to_gates_.at(gate_name); + } + auto strToGeneratorOp(const std::string &gntr_name) const + -> Gates::GeneratorOperation { + return str_to_gntrs_.at(gntr_name); + } + /** * @brief Register a new gate operation for the operation. Can pass a custom * kernel */ template - void registerGateOperation(const std::string &op_name, + void registerGateOperation(Gates::GateOperation gate_op, Gates::KernelType kernel, FunctionType &&func) { // TODO: Add mutex when we go to multithreading - gates_.emplace(std::make_pair(op_name, kernel), + gates_.emplace(std::make_pair(gate_op, kernel), std::forward(func)); } @@ -179,13 +153,12 @@ template class DynamicDispatcher { * kernel */ template - void registerGeneratorOperation(const std::string &op_name, + void registerGeneratorOperation(Gates::GeneratorOperation gntr_op, Gates::KernelType kernel, FunctionType &&func) { // TODO: Add mutex when we go to multithreading - generators_.emplace( - std::make_pair(removeGeneratorPrefix(op_name), kernel), - std::forward(func)); + generators_.emplace(std::make_pair(gntr_op, kernel), + std::forward(func)); } /** @@ -203,26 +176,19 @@ template class DynamicDispatcher { size_t num_qubits, const std::string &op_name, const std::vector &wires, bool inverse, const std::vector ¶ms = {}) const { - const auto iter = gates_.find(std::make_pair(op_name, kernel)); + const auto iter = + gates_.find(std::make_pair(strToGateOp(op_name), kernel)); if (iter == gates_.cend()) { throw std::invalid_argument( "Cannot find a gate with a given name \"" + op_name + "\"."); } - const auto gate_wire_iter = gate_wires_.find(op_name); - if ((gate_wire_iter != gate_wires_.end()) && - (gate_wire_iter->second != wires.size())) { - throw std::invalid_argument( - std::string("The supplied gate requires ") + - std::to_string(gate_wire_iter->second) + " wires, but " + - std::to_string(wires.size()) + " were supplied."); - // TODO: change to std::format in C++20 - } (iter->second)(data, num_qubits, wires, inverse, params); } /** - * @brief Apply a single gate to the state-vector using a registered kernel + * @brief Apply a single gate to the state-vector using the given kernel. * + * @param kernel Kernel to run the gate operation. * @param data Pointer to data. * @param num_qubits Number of qubits. * @param op_name Gate operation name. @@ -230,17 +196,19 @@ template class DynamicDispatcher { * @param inverse Indicates whether to use inverse of gate. * @param params Optional parameter list for parametric gates. */ - inline void - applyOperation(CFP_t *data, size_t num_qubits, const std::string &op_name, - const std::vector &wires, bool inverse, - const std::vector ¶ms = {}) const { - const auto kernel_iter = gate_kernel_map_.find(op_name); - if (kernel_iter == gate_kernel_map_.end()) { - PL_ABORT("Kernel for gate " + op_name + " is not registered."); + void applyOperation(Gates::KernelType kernel, CFP_t *data, + size_t num_qubits, Gates::GateOperation gate_op, + const std::vector &wires, bool inverse, + const std::vector ¶ms = {}) const { + const auto iter = gates_.find(std::make_pair(gate_op, kernel)); + if (iter == gates_.cend()) { + throw std::invalid_argument( + std::string("Cannot find a gate with a given name \"") + + std::string( + Util::lookup(Gates::Constant::gate_names, gate_op)) + + "\"."); } - - applyOperation(kernel_iter->second, data, num_qubits, op_name, wires, - inverse, params); + (iter->second)(data, num_qubits, wires, inverse, params); } /** @@ -312,34 +280,13 @@ template class DynamicDispatcher { size_t num_qubits, const std::string &op_name, const std::vector &wires, bool adj) const -> PrecisionT { - const auto iter = generators_.find(std::make_pair(op_name, kernel)); + const auto iter = + generators_.find(std::make_pair(strToGeneratorOp(op_name), kernel)); if (iter == generators_.cend()) { throw std::invalid_argument( "Cannot find a gate with a given name \"" + op_name + "\"."); } return (iter->second)(data, num_qubits, wires, adj); } - - /** - * @brief Apply a single gate to the state-vector using a registered kernel - * - * @param data Pointer to data. - * @param num_qubits Number of qubits. - * @param op_name Gate operation name. - * @param wires Wires to apply gate to. - * @param adj Indicates whether to use adjoint of gate. - */ - inline auto applyGenerator(CFP_t *data, size_t num_qubits, - const std::string &op_name, - const std::vector &wires, bool adj) const - -> PrecisionT { - const auto kernel_iter = generator_kernel_map_.find(op_name); - if (kernel_iter == generator_kernel_map_.end()) { - PL_ABORT("Kernel for gate " + op_name + " is not registered."); - } - - return applyGenerator(kernel_iter->second, data, num_qubits, op_name, - wires, adj); - } }; } // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/Measures.hpp b/pennylane_lightning/src/simulator/Measures.hpp index 26208b6ba1..f2f1cc010a 100644 --- a/pennylane_lightning/src/simulator/Measures.hpp +++ b/pennylane_lightning/src/simulator/Measures.hpp @@ -26,7 +26,7 @@ #include #include "LinearAlgebra.hpp" -#include "StateVectorManaged.hpp" +#include "StateVectorCPU.hpp" #include "StateVectorRaw.hpp" namespace Pennylane { @@ -123,7 +123,7 @@ class Measures { const std::vector &wires) { // Copying the original state vector, for the application of the // observable operator. - StateVectorManaged operator_statevector(original_statevector); + StateVectorCPU operator_statevector(original_statevector); operator_statevector.applyMatrix(matrix, wires); @@ -143,7 +143,7 @@ class Measures { const std::vector &wires) { // Copying the original state vector, for the application of the // observable operator. - StateVectorManaged operator_statevector(original_statevector); + StateVectorCPU operator_statevector(original_statevector); operator_statevector.applyOperation(operation, wires); @@ -190,7 +190,7 @@ class Measures { fp_t var(const std::string &operation, const std::vector &wires) { // Copying the original state vector, for the application of the // observable operator. - StateVectorManaged operator_statevector(original_statevector); + StateVectorCPU operator_statevector(original_statevector); operator_statevector.applyOperation(operation, wires); @@ -216,7 +216,7 @@ class Measures { const std::vector &wires) { // Copying the original state vector, for the application of the // observable operator. - StateVectorManaged operator_statevector(original_statevector); + StateVectorCPU operator_statevector(original_statevector); operator_statevector.applyMatrix(matrix, wires); diff --git a/pennylane_lightning/src/simulator/StateVectorBase.hpp b/pennylane_lightning/src/simulator/StateVectorBase.hpp index dec223408c..e2b3ac32e8 100644 --- a/pennylane_lightning/src/simulator/StateVectorBase.hpp +++ b/pennylane_lightning/src/simulator/StateVectorBase.hpp @@ -141,6 +141,18 @@ template class StateVectorBase { return static_cast(this)->getData(); } + [[nodiscard]] inline auto + getKernelForGate(Gates::GateOperation gate_op) const -> Gates::KernelType { + return static_cast(this)->getKernelForGate(gate_op); + } + + [[nodiscard]] inline auto + getKernelForGenerator(Gates::GeneratorOperation gntr_op) const + -> Gates::KernelType { + return static_cast(this)->getKernelForGenerator( + gntr_op); + } + /** * @brief Compare two statevectors. * @@ -192,8 +204,10 @@ template class StateVectorBase { const std::vector &wires, bool inverse = false, const std::vector ¶ms = {}) { auto *arr = getData(); - DynamicDispatcher::getInstance().applyOperation( - arr, num_qubits_, opName, wires, inverse, params); + auto &dispatcher = DynamicDispatcher::getInstance(); + const auto gate_op = dispatcher.strToGateOp(opName); + dispatcher.applyOperation(getKernelForGate(gate_op), arr, num_qubits_, + gate_op, wires, inverse, params); } /** @@ -208,9 +222,15 @@ template class StateVectorBase { const std::vector> &wires, const std::vector &inverse, const std::vector> ¶ms) { - auto *arr = getData(); - DynamicDispatcher::getInstance().applyOperations( - arr, num_qubits_, ops, wires, inverse, params); + const size_t numOperations = ops.size(); + if (numOperations != wires.size()) { + throw std::invalid_argument( + "Invalid arguments: number of operations, wires, and " + "parameters must all be equal"); + } + for (size_t i = 0; i < numOperations; i++) { + applyOperation(ops[i], wires[i], inverse[i], params[i]); + } } /** @@ -223,9 +243,15 @@ template class StateVectorBase { void applyOperations(const std::vector &ops, const std::vector> &wires, const std::vector &inverse) { - auto *arr = getData(); - DynamicDispatcher::getInstance().applyOperations( - arr, num_qubits_, ops, wires, inverse); + const size_t numOperations = ops.size(); + if (numOperations != wires.size()) { + throw std::invalid_argument( + "Invalid arguments: number of operations, wires, and " + "parameters must all be equal"); + } + for (size_t i = 0; i < numOperations; i++) { + applyOperation(ops[i], wires[i], inverse[i], {}); + } } /** @@ -256,8 +282,10 @@ template class StateVectorBase { const std::vector &wires, bool adj = false) -> PrecisionT { auto *arr = getData(); - return DynamicDispatcher::getInstance().applyGenerator( - arr, num_qubits_, opName, wires, adj); + auto &dispatcher = DynamicDispatcher::getInstance(); + return dispatcher.applyGenerator( + getKernelForGenerator(dispatcher.strToGeneratorOp(opName)), arr, + num_qubits_, opName, wires, adj); } /** diff --git a/pennylane_lightning/src/simulator/StateVectorCPU.hpp b/pennylane_lightning/src/simulator/StateVectorCPU.hpp new file mode 100644 index 0000000000..a003adf1ba --- /dev/null +++ b/pennylane_lightning/src/simulator/StateVectorCPU.hpp @@ -0,0 +1,166 @@ +// Copyright 2021 Xanadu Quantum Technologies Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "BitUtil.hpp" +#include "DefaultKernelsForStateVector.hpp" +#include "DispatchKeys.hpp" +#include "Gates.hpp" +#include "KernelType.hpp" +#include "Memory.hpp" +#include "StateVectorBase.hpp" +#include "Util.hpp" + +namespace Pennylane { + +/** + * @brief StateVector class where data resides in CPU memory. Memory ownership + * resides within class. + * + * We currently use std::unique_ptr to C-style array as we want to choose + * allocator in runtime. This is impossible with std::vector. + * + * @tparam PrecisionT + */ +template +class StateVectorCPU + : public StateVectorBase> { + public: + using ComplexPrecisionT = std::complex; + + private: + using BaseType = StateVectorBase; + + Threading threading_; + CPUMemoryModel memory_model_; + + std::unordered_map + kernel_for_gates_; + std::unordered_map + kernel_for_generators_; + std::unique_ptr + data_; // NOLINT(modernize-avoid-c-arrays) + + void setKernels(size_t num_qubits, Threading threading, + CPUMemoryModel memory_model) { + auto &default_kernels = DefaultKernelsForStateVector::getInstance(); + kernel_for_gates_ = default_kernels.getGateKernelMap( + num_qubits, threading, memory_model); + kernel_for_generators_ = default_kernels.getGeneratorKernelMap( + num_qubits, threading, memory_model); + } + + public: + explicit StateVectorCPU(size_t num_qubits, + Threading threading = bestThreading(), + CPUMemoryModel memory_model = bestCPUMemoryModel()) + : BaseType(num_qubits), threading_{threading}, memory_model_{ + memory_model} { + + setKernels(num_qubits, threading, memory_model); + + size_t length = BaseType::getLength(); + data_ = std::unique_ptr{new (std::align_val_t{ + 64}) ComplexPrecisionT[length]}; // NOLINT(modernize-avoid-c-arrays) + std::fill(data_.get(), data_.get() + length, + ComplexPrecisionT{0.0, 0.0}); + data_[0] = {1, 0}; + } + + template + explicit StateVectorCPU( + const StateVectorBase &other, + Threading threading = bestThreading(), + CPUMemoryModel memory_model = bestCPUMemoryModel()) + : BaseType(other.getNumQubits()), threading_{threading}, + memory_model_{memory_model} { + + size_t length = BaseType::getLength(); + data_ = std::unique_ptr{new (std::align_val_t{ + 64}) ComplexPrecisionT[length]}; // NOLINT(modernize-avoid-c-arrays) + + std::copy(other.getData(), other.getData() + length, data_.get()); + + setKernels(BaseType::getNumQubits(), threading, memory_model); + } + + StateVectorCPU(const ComplexPrecisionT *other_data, size_t other_size, + Threading threading = bestThreading(), + CPUMemoryModel memory_model = bestCPUMemoryModel()) + : BaseType(Util::log2PerfectPower(other_size)), threading_{threading}, + memory_model_{memory_model} { + PL_ABORT_IF_NOT(Util::isPerfectPowerOf2(other_size), + "The size of provided data must be a power of 2."); + data_ = std::unique_ptr{ + new (std::align_val_t{64}) ComplexPrecisionT + [other_size]}; // NOLINT(modernize-avoid-c-arrays) + setKernels(BaseType::getNumQubits(), threading, memory_model); + + updateData(other_data); + } + + template + explicit StateVectorCPU( + const std::vector, Alloc> &rhs, + Threading threading = bestThreading(), + CPUMemoryModel memory_model = bestCPUMemoryModel()) + : StateVectorCPU(rhs.data(), rhs.size(), threading, + memory_model) // NOLINT(hicpp-member-init) + // this is false positive for delegating + // constructor from clang-tidy + {} + + StateVectorCPU(const StateVectorCPU &rhs) + : BaseType(rhs.getNumQubits()), threading_{rhs.threading_}, + memory_model_{rhs.memory_model_} { + setKernels(BaseType::getNumQubits(), threading_, memory_model_); + + size_t length = BaseType::getLength(); + data_ = std::unique_ptr{new (std::align_val_t{ + 64}) ComplexPrecisionT[length]}; // NOLINT(modernize-avoid-c-arrays) + std::copy(rhs.getData(), rhs.getData() + length, data_.get()); + } + + StateVectorCPU(StateVectorCPU &&) noexcept = default; + + StateVectorCPU &operator=(const StateVectorCPU &) = delete; + StateVectorCPU &operator=(StateVectorCPU &&) noexcept = default; + + ~StateVectorCPU() = default; + + [[nodiscard]] auto getData() -> ComplexPrecisionT * { return data_.get(); } + + [[nodiscard]] auto getData() const -> const ComplexPrecisionT * { + return data_.get(); + } + + [[nodiscard]] inline auto + getKernelForGate(Gates::GateOperation gate_op) const -> Gates::KernelType { + return kernel_for_gates_.at(gate_op); + } + + [[nodiscard]] inline auto + getKernelForGenerator(Gates::GeneratorOperation gntr_op) const + -> Gates::KernelType { + return kernel_for_generators_.at(gntr_op); + } + + /** + * @brief Update data of the class to new_data + * + * @param new_data std::vector contains data. + */ + void updateData(const ComplexPrecisionT *data) { + std::copy(data, data + BaseType::getLength(), data_.get()); + } +}; + +} // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/StateVectorManaged.hpp b/pennylane_lightning/src/simulator/StateVectorManaged.hpp deleted file mode 100644 index a1317e75d5..0000000000 --- a/pennylane_lightning/src/simulator/StateVectorManaged.hpp +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright 2021 Xanadu Quantum Technologies Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include "BitUtil.hpp" -#include "StateVectorBase.hpp" -#include "Util.hpp" - -namespace Pennylane { - -/** - * @brief Managed memory version of StateVector class. Memory ownership resides - * within class. - * - * This class is only internally used in C++ code. - * - * @tparam PrecisionT - */ -template -class StateVectorManaged - : public StateVectorBase> { - public: - using ComplexPrecisionT = std::complex; - - private: - using BaseType = StateVectorBase; - - std::vector data_; - - public: - StateVectorManaged() : StateVectorBase() {} - - explicit StateVectorManaged(size_t num_qubits) - : BaseType(num_qubits), - data_(static_cast(Util::exp2(num_qubits)), - ComplexPrecisionT{0, 0}) { - data_[0] = {1, 0}; - } - - template - explicit StateVectorManaged( - const StateVectorBase &other) - : BaseType(other.getNumQubits()), data_{other.getData(), - other.getData() + - other.getLength()} {} - - explicit StateVectorManaged( - const std::vector &other_data) - : BaseType(Util::log2(other_data.size())), data_{other_data} { - PL_ABORT_IF_NOT(Util::isPerfectPowerOf2(other_data.size()), - "The size of provided data must be a power of 2."); - } - - StateVectorManaged(const ComplexPrecisionT *other_data, size_t other_size) - : BaseType(Util::log2(other_size)), data_{other_data, - other_data + other_size} { - PL_ABORT_IF_NOT(Util::isPerfectPowerOf2(other_size), - "The size of provided data must be a power of 2."); - } - - StateVectorManaged(const StateVectorManaged &other) = default; - StateVectorManaged(StateVectorManaged &&other) noexcept = - default; - - ~StateVectorManaged() = default; - - auto operator=(const StateVectorManaged &other) - -> StateVectorManaged & = default; - auto operator=(StateVectorManaged &&other) noexcept - -> StateVectorManaged & = default; - - auto getDataVector() -> std::vector & { return data_; } - [[nodiscard]] auto getDataVector() const - -> const std::vector & { - return data_; - } - - [[nodiscard]] auto getData() -> ComplexPrecisionT * { return data_.data(); } - - [[nodiscard]] auto getData() const -> const ComplexPrecisionT * { - return data_.data(); - } - - /** - * @brief Update data of the class to new_data - * - * @param new_data std::vector contains data. - */ - void updateData(const std::vector &new_data) { - PL_ABORT_IF_NOT(data_.size() == new_data.size(), - "New data must be the same size as old data.") - std::copy(new_data.begin(), new_data.end(), data_.begin()); - } -}; - -} // namespace Pennylane diff --git a/pennylane_lightning/src/tests/.clang-tidy b/pennylane_lightning/src/tests/.clang-tidy index 3ed93f21bf..0a70c347b0 100644 --- a/pennylane_lightning/src/tests/.clang-tidy +++ b/pennylane_lightning/src/tests/.clang-tidy @@ -1,5 +1,5 @@ --- -Checks: 'clang-diagnostic-*,clang-analyzer-*,-*,-llvmlibc-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,-readability-magic-numbers,-modernize-avoid-c-arrays' +Checks: 'clang-diagnostic-*,clang-analyzer-*,-*,-llvmlibc-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,-readability-magic-numbers,hicpp-*,-hicpp-no-array-decay,bugprone-suspicious-*,llvm-namespace-comment,cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-slicing,cppcoreguidelines-special-member-functions' WarningsAsErrors: '*' HeaderFilterRegex: '.*' AnalyzeTemporaryDtors: false @@ -216,7 +216,7 @@ CheckOptions: - key: modernize-use-auto.RemoveStars value: 'false' - key: readability-magic-numbers.IgnorePowersOf2IntegerValues - value: 'false' + value: 'true' - key: portability-simd-intrinsics.Std value: '' - key: readability-redundant-member-init.IgnoreBaseInCopyConstructors diff --git a/pennylane_lightning/src/tests/CMakeLists.txt b/pennylane_lightning/src/tests/CMakeLists.txt index c507f938bf..fbe9b621ea 100644 --- a/pennylane_lightning/src/tests/CMakeLists.txt +++ b/pennylane_lightning/src/tests/CMakeLists.txt @@ -68,9 +68,12 @@ endif() add_executable(compile_time_tests compile_time_tests.cpp) target_link_libraries(compile_time_tests lightning_gates lightning_utils) -set(TEST_SOURCES Test_AdjDiff.cpp +set(TEST_SOURCES CreateAllWires.cpp + Test_AdjDiff.cpp # Test_Bindings.cpp Test_DynamicDispatcher.cpp + Test_DefaultKernelsForStateVector.cpp + Test_GateImplementations_CompareKernels.cpp Test_GateImplementations_Generator.cpp Test_GateImplementations_Inverse.cpp Test_GateImplementations_Matrix.cpp @@ -80,7 +83,7 @@ set(TEST_SOURCES Test_AdjDiff.cpp Test_Internal.cpp Test_Measures.cpp Test_OpToMemberFuncPtr.cpp - Test_StateVectorManaged.cpp + Test_StateVectorCPU.cpp Test_StateVectorRaw.cpp Test_Util.cpp Test_VectorJacobianProduct.cpp) diff --git a/pennylane_lightning/src/tests/CreateAllWires.cpp b/pennylane_lightning/src/tests/CreateAllWires.cpp new file mode 100644 index 0000000000..43a7e80ce4 --- /dev/null +++ b/pennylane_lightning/src/tests/CreateAllWires.cpp @@ -0,0 +1,31 @@ +#include "CreateAllWires.hpp" +namespace Pennylane { +auto crateAllWires(size_t n_qubits, Gates::GateOperation gate_op, bool order) + -> std::vector> { + + if (Util::array_has_elt(Gates::Constant::multi_qubit_gates, gate_op)) { + // make all possible 2^N permutations + std::vector> res; + res.reserve((1U << n_qubits) - 1); + ; + for (size_t k = 1; k < (1U << n_qubits); k++) { + std::vector wires; + wires.reserve(Util::popcount(k)); + + for (size_t i = 0; i < n_qubits; i++) { + if (((k >> i) & 1) == 1) { + wires.emplace_back(i); + } + } + + res.push_back(wires); + } + return res; + } // else + const size_t n_wires = Util::lookup(Gates::Constant::gate_wires, gate_op); + if (order) { + return PermutationGenerator(n_qubits, n_wires).all_perms(); + } // else + return CombinationGenerator(n_qubits, n_wires).all_perms(); +} +} // namespace Pennylane diff --git a/pennylane_lightning/src/tests/CreateAllWires.hpp b/pennylane_lightning/src/tests/CreateAllWires.hpp new file mode 100644 index 0000000000..54d3cd9e9a --- /dev/null +++ b/pennylane_lightning/src/tests/CreateAllWires.hpp @@ -0,0 +1,92 @@ +#pragma once +#include "BitUtil.hpp" +#include "Constant.hpp" +#include "ConstantUtil.hpp" +#include "GateOperation.hpp" + +#include +#include + +namespace Pennylane { + +class WiresGenerator { + public: + [[nodiscard]] virtual auto all_perms() const + -> const std::vector> & = 0; +}; +class CombinationGenerator : public WiresGenerator { + private: + std::vector v_; + std::vector> all_perms_; + + public: + void comb(size_t n, size_t r) { + if (r == 0) { + all_perms_.push_back(v_); + return; + } + if (n < r) { + return; + } + + v_[r - 1] = n - 1; + comb(n - 1, r - 1); + + comb(n - 1, r); + } + + CombinationGenerator(size_t n, size_t r) { + v_.resize(r); + comb(n, r); + } + + [[nodiscard]] auto all_perms() const + -> const std::vector> & override { + return all_perms_; + } +}; +class PermutationGenerator : public WiresGenerator { + private: + std::vector> all_perms_; + std::vector available_elts_; + std::vector v; + + public: + void perm(size_t n, size_t r) { + if (r == 0) { + all_perms_.push_back(v); + return; + } + for (size_t i = 0; i < n; i++) { + v[r - 1] = available_elts_[i]; + std::swap(available_elts_[n - 1], available_elts_[i]); + perm(n - 1, r - 1); + std::swap(available_elts_[n - 1], available_elts_[i]); + } + } + + PermutationGenerator(size_t n, size_t r) { + v.resize(r); + + available_elts_.resize(n); + std::iota(available_elts_.begin(), available_elts_.end(), 0); + perm(n, r); + } + + [[nodiscard]] auto all_perms() const + -> const std::vector> & override { + return all_perms_; + } +}; + +/** + * @brief Create all possible combination of wires + * for a given number of qubits and gate operation + * + * @param n_qubits Number of qubits + * @param gate_op Gate operation + * @param order Whether the ordering matters (if true, permutation is used) + */ +auto crateAllWires(size_t n_qubits, Gates::GateOperation gate_op, bool order) + -> std::vector>; +} // namespace Pennylane diff --git a/pennylane_lightning/src/tests/TestHelpers.hpp b/pennylane_lightning/src/tests/TestHelpers.hpp index 74faeeb5ce..723d03b10a 100644 --- a/pennylane_lightning/src/tests/TestHelpers.hpp +++ b/pennylane_lightning/src/tests/TestHelpers.hpp @@ -1,17 +1,20 @@ -#include -#include -#include -#include -#include -#include - #include "Constant.hpp" #include "ConstantUtil.hpp" #include "Error.hpp" #include "GateOperation.hpp" #include "LinearAlgebra.hpp" +#include "Macros.hpp" +#include "Memory.hpp" +#include "TestKernels.hpp" #include "Util.hpp" +#include +#include +#include +#include +#include +#include + #include namespace Pennylane { @@ -94,6 +97,20 @@ bool operator!=(const std::vector &lhs, return !rhs.compare(lhs); } +template +bool operator==(const std::vector &lhs, + const std::vector &rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + for (size_t idx = 0; idx < lhs.size(); idx++) { + if (lhs[idx] != rhs[idx]) { + return false; + } + } + return true; +} + /** * @brief Utility function to compare complex statevector data. * @@ -132,6 +149,11 @@ isApproxEqual(const Data_t &data1, const Data_t &data2, data1.imag() != Approx(data2.imag()).epsilon(eps)); } +template +using TestVector = std::vector< + T, + PLAllocator, TestKernels>>>; + /** * @brief Multiplies every value in a dataset by a given complex scalar value. * @@ -140,8 +162,8 @@ isApproxEqual(const Data_t &data1, const Data_t &data2, * @param data Data to be scaled. * @param scalar Scalar value. */ -template -void scaleVector(std::vector> &data, +template +void scaleVector(std::vector, Alloc> &data, std::complex scalar) { std::transform( data.begin(), data.end(), data.begin(), @@ -156,8 +178,9 @@ void scaleVector(std::vector> &data, * @param data Data to be scaled. * @param scalar Scalar value. */ -template -void scaleVector(std::vector> &data, Data_t scalar) { +template +void scaleVector(std::vector, Alloc> &data, + Data_t scalar) { std::transform( data.begin(), data.end(), data.begin(), [scalar](const std::complex &c) { return c * scalar; }); @@ -168,8 +191,8 @@ void scaleVector(std::vector> &data, Data_t scalar) { */ template auto createZeroState(size_t num_qubits) - -> std::vector> { - std::vector> res(1U << num_qubits, {0.0, 0.0}); + -> TestVector> { + TestVector> res(1U << num_qubits, {0.0, 0.0}); res[0] = std::complex{1.0, 0.0}; return res; } @@ -179,8 +202,8 @@ auto createZeroState(size_t num_qubits) */ template auto createPlusState(size_t num_qubits) - -> std::vector> { - std::vector> res(1U << num_qubits, {1.0, 0.0}); + -> TestVector> { + TestVector> res(1U << num_qubits, {1.0, 0.0}); for (auto &elt : res) { elt /= std::sqrt(1U << num_qubits); } @@ -204,8 +227,8 @@ auto squaredNorm(const std::complex *data, size_t data_size) */ template auto createRandomState(RandomEngine &re, size_t num_qubits) - -> std::vector> { - std::vector> res(1U << num_qubits, {0.0, 0.0}); + -> TestVector> { + TestVector> res(1U << num_qubits, {0.0, 0.0}); std::uniform_real_distribution dist; for (size_t idx = 0; idx < (1U << num_qubits); idx++) { res[idx] = {dist(re), dist(re)}; @@ -221,9 +244,11 @@ auto createRandomState(RandomEngine &re, size_t num_qubits) * * Example: createProductState("+01") will produce |+01> state. */ -template auto createProductState(std::string_view str) { +template +auto createProductState(std::string_view str) + -> TestVector> { using Pennylane::Util::INVSQRT2; - std::vector> st; + TestVector> st; st.resize(1U << str.length()); std::vector zero{1.0, 0.0}; @@ -261,11 +286,13 @@ template auto createProductState(std::string_view str) { return st; } -inline auto createWires(Gates::GateOperation op) -> std::vector { +inline auto createWires(Gates::GateOperation op, size_t num_qubits) + -> std::vector { if (Pennylane::Util::array_has_elt(Gates::Constant::multi_qubit_gates, op)) { - // if multi-qubit gates - return {0, 1, 2}; + std::vector wires(num_qubits); + std::iota(wires.begin(), wires.end(), 0); + return wires; } switch (Pennylane::Util::lookup(Gates::Constant::gate_wires, op)) { case 1: @@ -301,10 +328,10 @@ auto createParams(Gates::GateOperation op) -> std::vector { */ template auto randomUnitary(RandomEngine &re, size_t num_qubits) - -> std::vector> { + -> TestVector> { using ComplexPrecisionT = std::complex; const size_t dim = (1U << num_qubits); - std::vector res(dim * dim, ComplexPrecisionT{}); + TestVector res(dim * dim, ComplexPrecisionT{}); std::normal_distribution dist; @@ -339,7 +366,7 @@ auto randomUnitary(RandomEngine &re, size_t num_qubits) ComplexPrecisionT *row_p = res.data() + row * dim; PrecisionT norm2 = std::sqrt(squaredNorm(row_p, dim)); - // noramlize row2 + // normalize row2 std::transform(row_p, row_p + dim, row_p, [norm2](const auto c) { return (static_cast(1.0) / norm2) * c; }); diff --git a/pennylane_lightning/src/tests/TestKernels.hpp b/pennylane_lightning/src/tests/TestKernels.hpp index e9b9cfa785..9e83198073 100644 --- a/pennylane_lightning/src/tests/TestKernels.hpp +++ b/pennylane_lightning/src/tests/TestKernels.hpp @@ -1,13 +1,15 @@ #pragma once /** - * @brief We define test kernels. Note that kernels not registered to + * @file + * We define test kernels. Note that kernels not registered to * AvailableKernels can be also tested by adding it to here. */ -#include "GateImplementationsLM.hpp" -#include "GateImplementationsPI.hpp" - +#include "Macros.hpp" #include "TypeList.hpp" +#include "cpu_kernels/GateImplementationsLM.hpp" +#include "cpu_kernels/GateImplementationsPI.hpp" + using TestKernels = Pennylane::Util::TypeList; + Pennylane::Gates::GateImplementationsPI, void>; diff --git a/pennylane_lightning/src/tests/Test_AdjDiff.cpp b/pennylane_lightning/src/tests/Test_AdjDiff.cpp index dfbac67214..6a05a36018 100644 --- a/pennylane_lightning/src/tests/Test_AdjDiff.cpp +++ b/pennylane_lightning/src/tests/Test_AdjDiff.cpp @@ -13,7 +13,7 @@ #include #include "AdjointDiff.hpp" -#include "StateVectorRaw.hpp" +#include "StateVectorCPU.hpp" #include "Util.hpp" #include "TestHelpers.hpp" @@ -50,10 +50,12 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=RX, Obs=Z", for (const auto &p : param) { auto ops = OpsData({"RX"}, {{p}}, {{0}}, {false}); + /* std::vector> cdata(0b1 << num_qubits); cdata[0] = std::complex{1, 0}; + */ - StateVectorRaw psi(cdata.data(), cdata.size()); + StateVectorCPU psi(num_qubits); std::vector tp{0}; std::vector> obs_ls{obs}; @@ -82,10 +84,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=RY, Obs=X", for (const auto &p : param) { auto ops = OpsData({"RY"}, {{p}}, {{0}}, {false}); - std::vector> cdata(0b1 << num_qubits); - cdata[0] = std::complex{1, 0}; - - StateVectorRaw psi(cdata.data(), cdata.size()); + StateVectorCPU psi(num_qubits); std::vector tp{0}; std::vector> obs_ls{obs}; @@ -109,9 +108,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=RX, Obs=[Z,Z]", const size_t num_obs = 2; std::vector jacobian(num_obs * num_params, 0); - std::vector> cdata(0b1 << num_qubits); - StateVectorRaw psi(cdata.data(), cdata.size()); - cdata[0] = std::complex{1, 0}; + StateVectorCPU psi(num_qubits); auto obs1 = ObsDatum({"PauliZ"}, {{}}, {{0}}); auto obs2 = ObsDatum({"PauliZ"}, {{}}, {{1}}); @@ -140,9 +137,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=[RX,RX,RX], Obs=[Z,Z,Z]", const size_t num_obs = 3; std::vector jacobian(num_obs * num_params, 0); - std::vector> cdata(0b1 << num_qubits); - StateVectorRaw psi(cdata.data(), cdata.size()); - cdata[0] = std::complex{1, 0}; + StateVectorCPU psi(num_qubits); auto obs1 = ObsDatum({"PauliZ"}, {{}}, {{0}}); auto obs2 = ObsDatum({"PauliZ"}, {{}}, {{1}}); @@ -179,9 +174,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=[RX,RX,RX], Obs=[Z,Z,Z], " std::vector jacobian(num_obs * num_params, 0); std::vector t_params{0, 2}; - std::vector> cdata(0b1 << num_qubits); - StateVectorRaw psi(cdata.data(), cdata.size()); - cdata[0] = std::complex{1, 0}; + StateVectorCPU psi(num_qubits); auto obs1 = ObsDatum({"PauliZ"}, {{}}, {{0}}); auto obs2 = ObsDatum({"PauliZ"}, {{}}, {{1}}); @@ -214,9 +207,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=[RX,RX,RX], Obs=[ZZZ]", const size_t num_obs = 1; std::vector jacobian(num_obs * num_params, 0); - std::vector> cdata(0b1 << num_qubits); - StateVectorRaw psi(cdata.data(), cdata.size()); - cdata[0] = std::complex{1, 0}; + StateVectorCPU psi(num_qubits); auto obs = ObsDatum({"PauliZ", "PauliZ", "PauliZ"}, {{}, {}, {}}, {{0}, {1}, {2}}); @@ -249,9 +240,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=Mixed, Obs=[XXX]", const size_t num_obs = 1; std::vector jacobian(num_obs * num_params, 0); - std::vector> cdata(0b1 << num_qubits); - StateVectorRaw psi(cdata.data(), cdata.size()); - cdata[0] = std::complex{1, 0}; + StateVectorCPU psi(num_qubits); auto obs = ObsDatum({"PauliX", "PauliX", "PauliX"}, {{}, {}, {}}, {{0}, {1}, {2}}); @@ -312,7 +301,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Decomposed Rot gate, non " std::vector> cdata{INVSQRT2(), -INVSQRT2()}; - StateVectorRaw psi(cdata.data(), cdata.size()); + StateVectorCPU psi(cdata.data(), cdata.size()); auto obs = ObsDatum({"PauliZ"}, {{}}, {{0}}); auto ops = OpsData( @@ -353,7 +342,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Mixed Ops, Obs and TParams", std::vector> cdata{ONE(), ZERO(), ZERO(), ZERO()}; - StateVectorRaw psi(cdata.data(), cdata.size()); + StateVectorCPU psi(cdata); auto obs = ObsDatum({"PauliX", "PauliZ"}, {{}, {}}, {{0}, {1}}); auto ops = OpsData( @@ -388,4 +377,4 @@ TEST_CASE("AdjointJacobian::adjointJacobian Mixed Ops, Obs and TParams", CHECK(expected[1] == Approx(jacobian[1])); CHECK(expected[2] == Approx(jacobian[2])); } -} \ No newline at end of file +} diff --git a/pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp b/pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp new file mode 100644 index 0000000000..aadc5426d0 --- /dev/null +++ b/pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp @@ -0,0 +1,32 @@ +#include "Constant.hpp" +#include "ConstantUtil.hpp" +#include "DefaultKernelsForStateVector.hpp" +#include "Util.hpp" + +#include + +using namespace Pennylane; + +TEST_CASE("Test default kernels for gates are well defined", + "[Test_DefaultKernelsForStateVector]") { + auto &instance = DefaultKernelsForStateVector::getInstance(); + Util::for_each_enum( + [&instance](Threading threading, CPUMemoryModel memory_model) { + for (size_t num_qubits = 1; num_qubits < 27; num_qubits++) { + REQUIRE_NOTHROW(instance.getGateKernelMap(num_qubits, threading, + memory_model)); + } + }); +} + +TEST_CASE("Test default kernels for generators are well defined", + "[Test_DefaultKernelsForStateVector]") { + auto &instance = DefaultKernelsForStateVector::getInstance(); + Util::for_each_enum( + [&instance](Threading threading, CPUMemoryModel memory_model) { + for (size_t num_qubits = 1; num_qubits < 27; num_qubits++) { + REQUIRE_NOTHROW(instance.getGeneratorKernelMap( + num_qubits, threading, memory_model)); + } + }); +} diff --git a/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp b/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp index f4dcf3b4c2..0146f99c35 100644 --- a/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp +++ b/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp @@ -36,12 +36,10 @@ struct testDispatchForKernel { Util::array_has_elt(GateImplementation::implemented_gates, gate_op), bool> = true> static void test(RandomEngine &re, size_t num_qubits) { - using CFP_t = std::complex; - const std::vector ini_st = - createRandomState(re, num_qubits); - std::vector expected = ini_st; + const auto ini_st = createRandomState(re, num_qubits); + auto expected = ini_st; - const auto wires = createWires(gate_op); + const auto wires = createWires(gate_op, num_qubits); const auto params = createParams(gate_op); // We first calculate expected directly calling a static member function diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp new file mode 100644 index 0000000000..30d6894b08 --- /dev/null +++ b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp @@ -0,0 +1,185 @@ +#include "CreateAllWires.hpp" +#include "TestHelpers.hpp" + +#include "OpToMemberFuncPtr.hpp" +#include "TestKernels.hpp" +#include "Util.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include + +/** + * @file Test_GateImplementations_Nonparam.cpp + * + * This file tests all gate operations (besides matrix) by comparing results + * between different kernels (gate implementations). + */ +using namespace Pennylane; +using namespace Pennylane::Gates; +using namespace Pennylane::Util; + +namespace { +using std::vector; +} + +template std::string kernelsToString() { + if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { + return std::string(TypeList::Type::name) + ", " + + kernelsToString(); + } + return std::string(TypeList::Type::name); + } +} + +/* Type transformation */ +template +struct KernelsImplementingGateHelper { + using Type = std::conditional_t< + array_has_elt(TypeList::Type::implemented_gates, gate_op), + typename PrependToTypeList< + typename TypeList::Type, + typename KernelsImplementingGateHelper< + gate_op, typename TypeList::Next>::Type>::Type, + typename KernelsImplementingGateHelper::Type>; +}; +template +struct KernelsImplementingGateHelper { + using Type = void; +}; + +template struct KernelsImplementingGate { + using Type = + typename KernelsImplementingGateHelper::Type; +}; + +/** + * @brief Apply the given gate operation with the given gate implementation. + * + * @tparam gate_op Gate operation to test + * @tparam PrecisionT Floating point data type for statevector + * @tparam ParamT Floating point data type for parameter + * @tparam GateImplementation Gate implementation class + * @param ini Initial statevector + * @param num_qubits Number of qubits + * @param wires Wires the gate applies to + * @param inverse Whether to use inverse of gate + * @param params Paramters for gate + */ +template +auto applyGate(TestVector> ini, size_t num_qubits, + const std::vector &wires, bool inverse, + const std::vector ¶ms) + -> TestVector> { + callGateOps(GateOpToMemberFuncPtr::value, + ini.data(), num_qubits, wires, inverse, params); + return ini; +} + +/** + * @brief Apply the given gate using all implementing kernels and return + * results in tuple. + */ +template +auto applyGateForImplemetingKernels( + const TestVector> &ini, size_t num_qubits, + const std::vector &wires, bool inverse, + const std::vector ¶ms, + [[maybe_unused]] std::index_sequence dummy) { + return std::make_tuple( + applyGate>( + ini, num_qubits, wires, inverse, params)...); +} + +template +void testApplyGate(RandomEngine &re, size_t num_qubits) { + const auto ini = createRandomState(re, num_qubits); + + using Kernels = typename KernelsImplementingGate::Type; + + INFO("Kernels implementing " << lookup(Constant::gate_names, gate_op) + << " are " << kernelsToString()); + + INFO("PrecisionT, ParamT = " << PrecisionToName::value << ", " + << PrecisionToName::value); + + if constexpr (gate_op != GateOperation::Matrix) { + const auto all_wires = crateAllWires(num_qubits, gate_op, true); + for (const auto &wires : all_wires) { + const auto params = createParams(gate_op); + const auto gate_name = lookup(Constant::gate_names, gate_op); + DYNAMIC_SECTION( + "Test gate " + << gate_name + << " with inverse = false") { // Test with inverse = false + const auto results = Util::tuple_to_array( + applyGateForImplemetingKernels( + ini, num_qubits, wires, false, params, + std::make_index_sequence()>())); + + for (size_t i = 0; i < results.size() - 1; i++) { + REQUIRE(results[i] == + PLApprox(results[i + 1]).margin(1e-7)); + } + } + + DYNAMIC_SECTION( + "Test gate " + << gate_name + << " with inverse = true") { // Test with inverse = true + const auto results = Util::tuple_to_array( + applyGateForImplemetingKernels( + ini, num_qubits, wires, true, params, + std::make_index_sequence()>())); + + for (size_t i = 0; i < results.size() - 1; i++) { + REQUIRE(results[i] == + PLApprox(results[i + 1]).margin(1e-7)); + } + } + } + } +} + +template +void testAllGatesIter(RandomEngine &re, size_t max_num_qubits) { + if constexpr (gate_idx < static_cast(GateOperation::END)) { + constexpr static auto gate_op = static_cast(gate_idx); + + size_t min_num_qubits = + array_has_elt(Constant::multi_qubit_gates, gate_op) + ? 1 + : lookup(Constant::gate_wires, gate_op); + for (size_t num_qubits = min_num_qubits; num_qubits < max_num_qubits; + num_qubits++) { + testApplyGate(re, num_qubits); + } + testAllGatesIter(re, max_num_qubits); + } +} + +template +void testAllGates(RandomEngine &re, size_t max_num_qubits) { + testAllGatesIter<0, PrecisionT, ParamT>(re, max_num_qubits); +} + +TEMPLATE_TEST_CASE("Test all kernels give the same results", + "[Test_GateImplementations_CompareKernels]", float, double) { + std::mt19937 re{1337}; + testAllGates(re, 6); +} diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp index 377c45bd5f..2e9cd9cdcb 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp @@ -83,7 +83,7 @@ void testGeneratorForGate(RandomEngine &re, size_t num_qubits) { DYNAMIC_SECTION("Test generator of " << gate_name << " for kernel " << GateImplementation::name) { - const auto wires = createWires(gate_op); + const auto wires = createWires(gate_op, num_qubits); const auto ini_st = createRandomState(re, num_qubits); auto gntr_func = diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp index fb172dafdb..19ffb8535b 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp @@ -38,7 +38,7 @@ void testInverseKernelGate(RandomEngine &re, size_t num_qubits) { GateOpToMemberFuncPtr::value; - const auto wires = createWires(gate_op); + const auto wires = createWires(gate_op, num_qubits); const auto params = createParams(gate_op); callGateOps(func_ptr, st.data(), num_qubits, wires, false, params); diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp index 86894a47f7..85772294ff 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp @@ -71,13 +71,18 @@ using std::vector; template void testApplyPauliX() { const size_t num_qubits = 3; - for (size_t index = 0; index < num_qubits; index++) { - auto st = createZeroState(num_qubits); - CHECK(st[0] == Util::ONE()); - - GateImplementation::applyPauliX(st.data(), num_qubits, {index}, false); - CHECK(st[0] == Util::ZERO()); - CHECK(st[0b1 << (num_qubits - index - 1)] == Util::ONE()); + DYNAMIC_SECTION(GateImplementation::name + << ", PauliX - " << PrecisionToName::value) { + for (size_t index = 0; index < num_qubits; index++) { + auto st = createZeroState(num_qubits); + CHECK(st[0] == Util::ONE()); + + GateImplementation::applyPauliX(st.data(), num_qubits, {index}, + false); + CHECK(st[0] == Util::ZERO()); + CHECK(st[0b1 << (num_qubits - index - 1)] == + Util::ONE()); + } } } PENNYLANE_RUN_TEST(PauliX); @@ -133,23 +138,20 @@ PENNYLANE_RUN_TEST(PauliZ); template void testApplyHadamard() { - using ComplexPrecisionT = std::complex; const size_t num_qubits = 3; for (size_t index = 0; index < num_qubits; index++) { auto st = createZeroState(num_qubits); - CHECK(st[0] == ComplexPrecisionT{1, 0}); GateImplementation::applyHadamard(st.data(), num_qubits, {index}, false); - ComplexPrecisionT expected(1 / std::sqrt(2), 0); - CHECK(expected.real() == Approx(st[0].real())); - CHECK(expected.imag() == Approx(st[0].imag())); - - CHECK(expected.real() == - Approx(st[0b1 << (num_qubits - index - 1)].real())); - CHECK(expected.imag() == - Approx(st[0b1 << (num_qubits - index - 1)].imag())); + std::vector expected_string; + expected_string.resize(num_qubits); + std::fill(expected_string.begin(), expected_string.end(), '0'); + expected_string[index] = '+'; + const auto expected = createProductState( + std::string_view{expected_string.data(), num_qubits}); + CHECK(expected == PLApprox(st)); } } PENNYLANE_RUN_TEST(Hadamard); @@ -205,17 +207,40 @@ PENNYLANE_RUN_TEST(T); template void testApplyCNOT() { const size_t num_qubits = 3; - auto st = createZeroState(num_qubits); - // Test using |+00> state to generate 3-qubit GHZ state - GateImplementation::applyHadamard(st.data(), num_qubits, {0}, false); + SECTION("CNOT0,1 |000> = |000>") { + const auto ini_st = createProductState("000"); + auto st = ini_st; + GateImplementation::applyCNOT(st.data(), num_qubits, {0, 1}, false); + CHECK(st == ini_st); + } + + SECTION("CNOT0,1 |100> = |110>") { + const auto ini_st = createProductState("100"); + auto st = ini_st; + GateImplementation::applyCNOT(st.data(), num_qubits, {0, 1}, false); + CHECK(st == + PLApprox(createProductState("110")).margin(1e-7)); + } + SECTION("CNOT1,2 |110> = |111>") { + const auto ini_st = createProductState("110"); + auto st = ini_st; + GateImplementation::applyCNOT(st.data(), num_qubits, {1, 2}, false); + CHECK(st == + PLApprox(createProductState("111")).margin(1e-7)); + } + + SECTION("Generate GHZ state") { + auto st = createProductState("+00"); - for (size_t index = 1; index < num_qubits; index++) { - GateImplementation::applyCNOT(st.data(), num_qubits, {index - 1, index}, - false); + // Test using |+00> state to generate 3-qubit GHZ state + for (size_t index = 1; index < num_qubits; index++) { + GateImplementation::applyCNOT(st.data(), num_qubits, + {index - 1, index}, false); + } + CHECK(st.front() == Util::INVSQRT2()); + CHECK(st.back() == Util::INVSQRT2()); } - CHECK(st.front() == Util::INVSQRT2()); - CHECK(st.back() == Util::INVSQRT2()); } PENNYLANE_RUN_TEST(CNOT); @@ -223,11 +248,8 @@ PENNYLANE_RUN_TEST(CNOT); template void testApplyCY() { using ComplexPrecisionT = std::complex; const size_t num_qubits = 3; - auto ini_st = createZeroState(num_qubits); - - // Test using |+10> state - GateImplementation::applyHadamard(ini_st.data(), num_qubits, {0}, false); - GateImplementation::applyPauliX(ini_st.data(), num_qubits, {1}, false); + auto ini_st = + createProductState("+10"); // Test using |+10> state CHECK(ini_st == std::vector{ Util::ZERO(), Util::ZERO(), @@ -299,20 +321,7 @@ template void testApplyCZ() { using ComplexPrecisionT = std::complex; const size_t num_qubits = 3; - auto ini_st = createZeroState(num_qubits); - - // Test using |+10> state - GateImplementation::applyHadamard(ini_st.data(), num_qubits, {0}, false); - GateImplementation::applyPauliX(ini_st.data(), num_qubits, {1}, false); - - auto st = ini_st; - CHECK(st == std::vector{ - Util::ZERO(), Util::ZERO(), - std::complex(1.0 / sqrt(2), 0), - Util::ZERO(), Util::ZERO(), - Util::ZERO(), - std::complex(1.0 / sqrt(2), 0), - Util::ZERO()}); + auto ini_st = createProductState("+10"); DYNAMIC_SECTION(GateImplementation::name << ", CZ0,1 |+10> -> |-10> - " @@ -340,7 +349,7 @@ template void testApplyCZ() { DYNAMIC_SECTION(GateImplementation::name << ", CZ0,2 |+10> -> |+10> - " << PrecisionToName::value) { - const std::vector &expected{ini_st}; + const auto &expected = ini_st; auto sv02 = ini_st; auto sv20 = ini_st; @@ -354,7 +363,7 @@ template void testApplyCZ() { DYNAMIC_SECTION(GateImplementation::name << ", CZ1,2 |+10> -> |+10> - " << PrecisionToName::value) { - const std::vector &expected{ini_st}; + const auto &expected = ini_st; auto sv12 = ini_st; auto sv21 = ini_st; @@ -372,11 +381,9 @@ PENNYLANE_RUN_TEST(CZ); template void testApplySWAP() { using ComplexPrecisionT = std::complex; const size_t num_qubits = 3; - auto ini_st = createZeroState(num_qubits); + auto ini_st = createProductState("+10"); // Test using |+10> state - GateImplementation::applyHadamard(ini_st.data(), num_qubits, {0}, false); - GateImplementation::applyPauliX(ini_st.data(), num_qubits, {1}, false); CHECK(ini_st == std::vector{ Util::ZERO(), Util::ZERO(), @@ -461,12 +468,9 @@ template void testApplyToffoli() { using ComplexPrecisionT = std::complex; const size_t num_qubits = 3; - auto ini_st = createZeroState(num_qubits); + auto ini_st = createProductState("+10"); // Test using |+10> state - GateImplementation::applyHadamard(ini_st.data(), num_qubits, {0}, false); - GateImplementation::applyPauliX(ini_st.data(), num_qubits, {1}, false); - DYNAMIC_SECTION(GateImplementation::name << ", Toffoli 0,1,2 |+10> -> |010> + |111> - " << PrecisionToName::value) { @@ -539,11 +543,8 @@ template void testApplyCSWAP() { using ComplexPrecisionT = std::complex; const size_t num_qubits = 3; - auto ini_st = createZeroState(num_qubits); - - // Test using |+10> state - GateImplementation::applyHadamard(ini_st.data(), num_qubits, {0}, false); - GateImplementation::applyPauliX(ini_st.data(), num_qubits, {1}, false); + auto ini_st = + createProductState("+10"); // Test using |+10> state DYNAMIC_SECTION(GateImplementation::name << ", CSWAP 0,1,2 |+10> -> |010> + |101> - " diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp index cc6f687e11..8e594fe3d6 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp @@ -164,8 +164,8 @@ void testApplyRY() { {0.10575112905629831, -0.47593196040758534}, {-0.8711876098966215, -0.0577721051072477}}}; - const std::vector init_state{ - {0.8775825618903728, 0.0}, {0.0, -0.47942553860420306}}; + const TestVector init_state{{0.8775825618903728, 0.0}, + {0.0, -0.47942553860420306}}; DYNAMIC_SECTION(GateImplementation::name << ", RY - " << PrecisionToName::value) { for (size_t index = 0; index < angles.size(); index++) { @@ -222,6 +222,14 @@ void testApplyRZ() { CHECK(st == PLApprox(expected_results[index])); } + + for (size_t index = 0; index < num_qubits; index++) { + auto st = createPlusState(num_qubits); + + GateImplementation::applyRZ(st.data(), num_qubits, {index}, true, + {-angles[index]}); + CHECK(st == PLApprox(expected_results[index])); + } } PENNYLANE_RUN_TEST(RZ); @@ -364,7 +372,7 @@ void testApplyIsingXX() { << ", IsingXX0,2 - " << PrecisionToName::value) { const size_t num_qubits = 3; - std::vector ini_st{ + const auto ini_st = TestVector{ ComplexPrecisionT{0.125681356503, 0.252712197380}, ComplexPrecisionT{0.262591068130, 0.370189000494}, ComplexPrecisionT{0.129300299863, 0.371057794075}, @@ -498,7 +506,7 @@ void testApplyIsingYY() { << PrecisionToName::value) { const size_t num_qubits = 4; - std::vector ini_st{ + const auto ini_st = TestVector{ ComplexPrecisionT{0.276522701942, 0.192601873155}, ComplexPrecisionT{0.035951282872, 0.224882549474}, ComplexPrecisionT{0.142578003191, 0.016769549184}, @@ -652,7 +660,7 @@ void testApplyIsingZZ() { << PrecisionToName::value) { const size_t num_qubits = 4; - std::vector ini_st{ + TestVector ini_st{ ComplexPrecisionT{0.267462841882, 0.010768564798}, ComplexPrecisionT{0.228575129706, 0.010564590956}, ComplexPrecisionT{0.099492749900, 0.260849823392}, diff --git a/pennylane_lightning/src/tests/Test_Internal.cpp b/pennylane_lightning/src/tests/Test_Internal.cpp index 00ead21271..819f472586 100644 --- a/pennylane_lightning/src/tests/Test_Internal.cpp +++ b/pennylane_lightning/src/tests/Test_Internal.cpp @@ -1,6 +1,8 @@ -#include "GateImplementationsPI.hpp" +#include "CreateAllWires.hpp" #include "TestHelpers.hpp" +#include "cpu_kernels/GateImplementationsPI.hpp" +#include #include #include @@ -95,8 +97,7 @@ TEMPLATE_TEST_CASE("randomUnitary", "[Test_Internal]", float, double) { const size_t dim = (1U << num_qubits); const auto unitary = randomUnitary(re, num_qubits); - std::vector> unitary_dagger = - Util::Transpose(unitary, dim, dim); + auto unitary_dagger = Util::Transpose(unitary, dim, dim); std::transform( unitary_dagger.begin(), unitary_dagger.end(), unitary_dagger.begin(), @@ -115,3 +116,79 @@ TEMPLATE_TEST_CASE("randomUnitary", "[Test_Internal]", float, double) { REQUIRE(mat == PLApprox(identity).margin(1e-5)); } } + +size_t binomialCeff(size_t n, size_t r) { + size_t num = 1; + size_t dem = 1; + for (size_t k = 0; k < r; k++) { + num *= (n - k); + } + for (size_t k = 1; k <= r; k++) { + dem *= k; + } + return num / dem; +} + +size_t permSize(size_t n, size_t r) { + size_t res = 1; + for (size_t k = 0; k < r; k++) { + res *= (n - k); + } + return res; +} + +/** + * @brief Test create all wires + */ +TEST_CASE("createAllWires", "[Test_Internal]") { + + SECTION("order = false") { + const std::vector> test_pairs{ + {4, 2}, {8, 3}, {12, 1}, {12, 2}, {12, 3}, {12, 4}, {12, 5}, + {12, 6}, {12, 7}, {12, 8}, {12, 9}, {12, 10}, {12, 11}, {12, 12}}; + + for (const auto [n, r] : test_pairs) { + std::vector> vec; + auto v = CombinationGenerator(n, r).all_perms(); + + REQUIRE(v.size() == binomialCeff(n, r)); + for (const auto &perm : v) { + REQUIRE(perm.size() == r); + vec.emplace_back(perm.begin(), perm.end()); + } + + std::sort(v.begin(), v.end(), + [](const std::vector &v1, + const std::vector &v2) { + return std::lexicographical_compare( + v1.begin(), v1.end(), v2.begin(), v2.end()); + }); // sort lexicographically + for (size_t i = 0; i < v.size() - 1; i++) { + REQUIRE(v[i] != v[i + 1]); // all combinations must be different + } + } + } + SECTION("order = true") { + const std::vector> test_pairs{ + {4, 2}, {8, 3}, {12, 1}, {12, 2}, {12, 3}, {12, 4}, {12, 5}}; + + for (const auto [n, r] : test_pairs) { + auto v = PermutationGenerator(n, r).all_perms(); + + REQUIRE(v.size() == permSize(n, r)); + for (const auto &perm : v) { + REQUIRE(perm.size() == r); + } + + std::sort(v.begin(), v.end(), + [](const std::vector &v1, + const std::vector &v2) { + return std::lexicographical_compare( + v1.begin(), v1.end(), v2.begin(), v2.end()); + }); // sort lexicographically + for (size_t i = 0; i < v.size() - 1; i++) { + REQUIRE(v[i] != v[i + 1]); // all permutations must be different + } + } + } +} diff --git a/pennylane_lightning/src/tests/Test_Measures.cpp b/pennylane_lightning/src/tests/Test_Measures.cpp index c72a9b68cf..b7ec1e8fd5 100644 --- a/pennylane_lightning/src/tests/Test_Measures.cpp +++ b/pennylane_lightning/src/tests/Test_Measures.cpp @@ -3,8 +3,7 @@ #include #include "Measures.hpp" -#include "StateVectorManaged.hpp" -#include "StateVectorRaw.hpp" +#include "StateVectorCPU.hpp" #include "Util.hpp" #include @@ -18,14 +17,14 @@ using std::string; using std::vector; }; // namespace -StateVectorManaged Initializing_StateVector() { +StateVectorCPU Initializing_StateVector() { // Defining a StateVector in a non-trivial configuration: size_t num_qubits = 3; size_t data_size = std::pow(2, num_qubits); std::vector> arr(data_size, 0); arr[0] = 1; - StateVectorManaged Measured_StateVector(arr.data(), data_size); + StateVectorCPU Measured_StateVector(arr.data(), data_size); std::vector wires; @@ -65,12 +64,11 @@ TEST_CASE("Probabilities", "[Measures]") { {1, 2}, {2, 1}, {0}, {1}, {2}}; // Defining the State Vector that will be measured. - StateVectorManaged Measured_StateVector = - Initializing_StateVector(); + StateVectorCPU Measured_StateVector = Initializing_StateVector(); // Initializing the measures class. // It will attach to the StateVector, allowing measures to keep been taken. - Measures> Measurer(Measured_StateVector); + Measures> Measurer(Measured_StateVector); vector probabilities; @@ -92,12 +90,11 @@ TEST_CASE("Probabilities", "[Measures]") { TEST_CASE("Expected Values", "[Measures]") { // Defining the State Vector that will be measured. - StateVectorManaged Measured_StateVector = - Initializing_StateVector(); + StateVectorCPU Measured_StateVector = Initializing_StateVector(); // Initializing the measures class. // It will attach to the StateVector, allowing measures to keep been taken. - Measures> Measurer(Measured_StateVector); + Measures> Measurer(Measured_StateVector); SECTION("Testing single operation defined by a matrix:") { vector> PauliX = {0, 1, 1, 0}; @@ -165,12 +162,11 @@ TEST_CASE("Expected Values", "[Measures]") { TEST_CASE("Variances", "[Measures]") { // Defining the State Vector that will be measured. - StateVectorManaged Measured_StateVector = - Initializing_StateVector(); + StateVectorCPU Measured_StateVector = Initializing_StateVector(); // Initializing the measures class. // It will attach to the StateVector, allowing measures to keep been taken. - Measures> Measurer(Measured_StateVector); + Measures> Measurer(Measured_StateVector); SECTION("Testing single operation defined by a matrix:") { vector> PauliX = {0, 1, 1, 0}; diff --git a/pennylane_lightning/src/tests/Test_StateVectorCPU.cpp b/pennylane_lightning/src/tests/Test_StateVectorCPU.cpp new file mode 100644 index 0000000000..17fd667c19 --- /dev/null +++ b/pennylane_lightning/src/tests/Test_StateVectorCPU.cpp @@ -0,0 +1,48 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "StateVectorCPU.hpp" +#include "StateVectorRaw.hpp" +#include "Util.hpp" + +#include "TestHelpers.hpp" + +using namespace Pennylane; + +TEMPLATE_TEST_CASE("StateVectorCPU::StateVectorCPU", "[StateVectorRaw]", float, + double) { + using fp_t = TestType; + + SECTION("StateVectorCPU") { + REQUIRE(!std::is_constructible_v>); + } + SECTION("StateVectorCPU") { + REQUIRE(!std::is_constructible_v>); + } + SECTION("StateVectorCPU {size_t}") { + REQUIRE(std::is_constructible_v, size_t>); + const size_t num_qubits = 4; + StateVectorCPU sv(num_qubits); + + REQUIRE(sv.getNumQubits() == 4); + REQUIRE(sv.getLength() == 16); + } + SECTION("StateVectorCPU {const StateVectorRaw&}") { + REQUIRE(std::is_constructible_v, + const StateVectorRaw &>); + } + SECTION("StateVectorCPU {const StateVectorCPU&}") { + REQUIRE(std::is_copy_constructible_v>); + } + SECTION("StateVectorCPU {StateVectorCPU&&}") { + REQUIRE(std::is_move_constructible_v>); + } +} diff --git a/pennylane_lightning/src/tests/Test_StateVectorManaged.cpp b/pennylane_lightning/src/tests/Test_StateVectorManaged.cpp deleted file mode 100644 index 3a30c68638..0000000000 --- a/pennylane_lightning/src/tests/Test_StateVectorManaged.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "StateVectorManaged.hpp" -#include "StateVectorRaw.hpp" -#include "Util.hpp" - -#include "TestHelpers.hpp" - -using namespace Pennylane; - -TEMPLATE_TEST_CASE("StateVectorManaged::StateVectorManaged", "[StateVectorRaw]", - float, double) { - using fp_t = TestType; - - SECTION("StateVectorManaged") { - REQUIRE(std::is_constructible_v>); - } - SECTION("StateVectorManaged") { - REQUIRE(std::is_constructible_v>); - } - SECTION("StateVectorManaged {size_t}") { - REQUIRE(std::is_constructible_v>); - const size_t num_qubits = 4; - StateVectorManaged sv(num_qubits); - - REQUIRE(sv.getNumQubits() == 4); - REQUIRE(sv.getLength() == 16); - REQUIRE(sv.getDataVector().size() == 16); - } - SECTION("StateVectorManaged {const StateVectorRaw&}") { - REQUIRE(std::is_constructible_v, - const StateVectorRaw &>); - } - SECTION( - "StateVectorManaged {const StateVectorManaged&}") { - REQUIRE(std::is_copy_constructible_v>); - } - SECTION("StateVectorManaged {StateVectorManaged&&}") { - REQUIRE(std::is_move_constructible_v>); - } -} diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp index 4360a793f5..2e804d05e2 100644 --- a/pennylane_lightning/src/tests/Test_Util.cpp +++ b/pennylane_lightning/src/tests/Test_Util.cpp @@ -567,6 +567,21 @@ TEST_CASE("Utility bit operations", "[Util][BitUtil]") { } } } + + SECTION("Bitswap") { + CHECK(Util::bitswap(0B001101, 0, 1) == 0B001110); + CHECK(Util::bitswap(0B001101, 0, 2) == 0B001101); + CHECK(Util::bitswap(0B001101, 0, 3) == 0B001101); + CHECK(Util::bitswap(0B001101, 0, 4) == 0B011100); + } + + SECTION("fillOnes") { + CHECK(Util::fillOnes(4) == 0B1111); + CHECK(Util::fillOnes(6) == 0B111111); + CHECK(Util::fillOnes(17) == 0B1'1111'1111'1111'1111); + CHECK(Util::fillOnes(54) == + 0x3F'FFFF'FFFF'FFFF); // 54 == 4*13 + 2 + } } TEST_CASE("Utility array and tuples", "[Util]") { diff --git a/pennylane_lightning/src/util/BitUtil.hpp b/pennylane_lightning/src/util/BitUtil.hpp index 8b7251ddc3..d6996a77d8 100644 --- a/pennylane_lightning/src/util/BitUtil.hpp +++ b/pennylane_lightning/src/util/BitUtil.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #if defined(_MSC_VER) #include // for __lzcnt64 and __popcount @@ -171,6 +172,18 @@ inline auto log2PerfectPower(unsigned long val) -> size_t { #endif ///@} +constexpr auto constLog2PerfectPower(size_t value) -> size_t { + if (value == 0) { + return 0; // not well defined + } + size_t n = 0; + while ((value & 1U) == 0U) { + value >>= 1U; + ++n; + } + return n; +} + /** * @brief Check if there is a positive integer n such that value == 2^n. * @@ -196,8 +209,18 @@ inline auto constexpr fillLeadingOnes(size_t pos) -> size_t { /** * @brief Swap bits in i-th and j-th position in place */ -inline void constexpr bitswap(size_t bits, const size_t i, const size_t j) { +inline auto constexpr bitswap(size_t bits, const size_t i, const size_t j) + -> size_t { size_t x = ((bits >> i) ^ (bits >> j)) & 1U; - bits ^= ((x << i) | (x << j)); + return bits ^ ((x << i) | (x << j)); +} + +template +inline auto constexpr fillOnes(size_t nbits) -> IntegerType { + static_assert(std::is_integral_v && + std::is_unsigned_v); + + return static_cast(~IntegerType(0)) >> + static_cast(CHAR_BIT * sizeof(IntegerType) - nbits); } } // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/LinearAlgebra.hpp b/pennylane_lightning/src/util/LinearAlgebra.hpp index 40ea4292a8..bdf0e35f99 100644 --- a/pennylane_lightning/src/util/LinearAlgebra.hpp +++ b/pennylane_lightning/src/util/LinearAlgebra.hpp @@ -210,9 +210,9 @@ inline auto innerProdC(const std::complex *v1, const std::complex *v2, * @see innerProd(const std::complex *v1, const std::complex *v2, * const size_t data_size) */ -template -inline auto innerProd(const std::vector> &v1, - const std::vector> &v2) +template +inline auto innerProd(const std::vector, AllocA> &v1, + const std::vector, AllocB> &v2) -> std::complex { return innerProd(v1.data(), v2.data(), v1.size()); } @@ -224,9 +224,9 @@ inline auto innerProd(const std::vector> &v1, * @see innerProdC(const std::complex *v1, const std::complex *v2, * const size_t data_size) */ -template -inline auto innerProdC(const std::vector> &v1, - const std::vector> &v2) +template +inline auto innerProdC(const std::vector, AllocA> &v1, + const std::vector, AllocB> &v2) -> std::complex { return innerProdC(v1.data(), v2.data(), v1.size()); } @@ -484,15 +484,15 @@ inline auto Transpose(const std::vector> &mat, size_t m, * @param n Number of columns of `mat`. * @return mat transpose of shape n * m. */ -template -inline auto Transpose(const std::vector &mat, size_t m, size_t n) - -> std::vector { +template +inline auto Transpose(const std::vector &mat, size_t m, size_t n) + -> std::vector { if (mat.size() != m * n) { throw std::invalid_argument( "Invalid number of rows and columns for the input matrix"); } - std::vector mat_t(n * m); + std::vector mat_t(n * m); CFTranspose(mat.data(), mat_t.data(), m, n, 0, m, 0, n); return mat_t; } @@ -548,9 +548,10 @@ inline void vecMatrixProd(const T *v_in, const T *mat, T *v_out, size_t m, * @see inline void vecMatrixProd(const T *v_in, * const T *mat, T *v_out, size_t m, size_t n) */ -template -inline auto vecMatrixProd(const std::vector &v_in, const std::vector &mat, - size_t m, size_t n) -> std::vector { +template +inline auto vecMatrixProd(const std::vector &v_in, + const std::vector &mat, size_t m, size_t n) + -> std::vector { if (v_in.size() != m) { throw std::invalid_argument("Invalid size for the input vector"); } @@ -559,7 +560,7 @@ inline auto vecMatrixProd(const std::vector &v_in, const std::vector &mat, "Invalid number of rows and columns for the input matrix"); } - std::vector v_out(n); + std::vector v_out(n); vecMatrixProd(v_in.data(), mat.data(), v_out.data(), m, n); return v_out; diff --git a/pennylane_lightning/src/util/Macros.hpp b/pennylane_lightning/src/util/Macros.hpp index 1b60d1e076..eeba364ba8 100644 --- a/pennylane_lightning/src/util/Macros.hpp +++ b/pennylane_lightning/src/util/Macros.hpp @@ -19,6 +19,74 @@ #if defined(__GNUC__) || defined(__clang__) #define PL_UNREACHABLE __builtin_unreachable() -#else +#elif defined(_MSC_VER) #define PL_UNREACHABLE __assume(false) +#else // Unsupported compiler +#define PL_UNREACHABLE +#endif + +#if defined(__AVX2__) +#define PL_USE_AVX2 1 +[[maybe_unused]] static constexpr bool use_avx2 = true; +#else +[[maybe_unused]] static constexpr bool use_avx2 = false; +#endif + +#if defined(__AVX512F__) +#define PL_USE_AVX512F 1 +[[maybe_unused]] static constexpr bool use_avx512f = true; +#else +[[maybe_unused]] static constexpr bool use_avx512f = false; +#endif + +#if defined(__AVX512DQ__) +#define PL_USE_AVX512DQ 1 +[[maybe_unused]] static constexpr bool use_avx512dq = true; +#else +[[maybe_unused]] static constexpr bool use_avx512dq = false; +#endif + +#if defined(__AVX512VL__) +#define PL_USE_AVX512VL 1 +[[maybe_unused]] static constexpr bool use_avx512vl = true; +#else +[[maybe_unused]] static constexpr bool use_avx512vl = false; +#endif + +#if defined(_OPENMP) +#define PL_USE_OMP 1 +[[maybe_unused]] static constexpr bool use_openmp = true; +#else +[[maybe_unused]] static constexpr bool use_openmp = false; +#endif + +#if (_OPENMP >= 202011) +#define PL_UNROLL_LOOP __Pragma("omp unroll(8)") +#elif defined(__GNUC__) +#define PL_UNROLL_LOOP _Pragma("GCC unroll 8") +#elif defined(__clang__) +#define PL_UNROLL_LOOP _Pragma("unroll(8)") +#else +#define PL_UNROLL_LOOP +#endif + +// Define force inline +#if defined(__GNUC__) || defined(__clang__) +#if NDEBUG +#define PL_FORCE_INLINE __attribute__((always_inline)) inline +#else +#define PL_FORCE_INLINE +#endif +#elif defined(_MSC_VER) +#if NDEBUG +#define PL_FORCE_INLINE __forceinline +#else +#define PL_FORCE_INLINE +#endif +#else +#if NDEBUG +#define PL_FORCE_INLINE inline +#else +#define PL_FORCE_INLINE +#endif #endif diff --git a/pennylane_lightning/src/util/Memory.hpp b/pennylane_lightning/src/util/Memory.hpp new file mode 100644 index 0000000000..dcd8dd359d --- /dev/null +++ b/pennylane_lightning/src/util/Memory.hpp @@ -0,0 +1,106 @@ +// Copyright 2022 Xanadu Quantum Technologies Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include + +#include "TypeList.hpp" + +namespace Pennylane { + +constexpr auto constIsPerfectPowerOf2(size_t value) -> bool { + while ((value & 1U) == 0) { + value >>= 1U; + } + return value == 1; +} + +template struct AlignedAllocator { + static_assert(constIsPerfectPowerOf2(alignment), + "Template parameter alignment must be power of 2."); + using value_type = T; + + AlignedAllocator() = default; + + template struct rebind { + using other = AlignedAllocator; + }; + + template + explicit constexpr AlignedAllocator( + [[maybe_unused]] const AlignedAllocator &rhs) noexcept {} + + [[nodiscard]] T *allocate(std::size_t size) { + if (size == 0) { + return nullptr; + } + void *p = std::aligned_alloc( + alignment, + sizeof(T) * size); // NOLINT(cppcoreguidelines-owning-memory) + if (p == nullptr) { + throw std::bad_alloc(); + } + return static_cast(p); + } + + void deallocate(T *p, [[maybe_unused]] std::size_t size) noexcept { + std::free( + p); // NOLINT(hicpp-no-malloc, cppcoreguidelines-owning-memory) + } + + template void construct(U *ptr) { ::new ((void *)ptr) U(); } + + template void destroy(U *ptr) { + (void)ptr; + ptr->~U(); + } +}; + +template +bool operator==([[maybe_unused]] const AlignedAllocator &lhs, + [[maybe_unused]] const AlignedAllocator &rhs) { + return true; +} + +template +bool operator!=([[maybe_unused]] const AlignedAllocator &lhs, + [[maybe_unused]] const AlignedAllocator &rhs) { + return false; +} + +/** + * @brief This function calculate the common multiplier of alignments of all + * kernels. + * + * As all alignment must be a multiple of 2, we just can choose the maximum + * alignment. + */ +template struct commonAlignmentHelper { + constexpr static uint32_t value = + std::max(TypeList::Type::packed_bytes, + commonAlignmentHelper::value); +}; +template <> struct commonAlignmentHelper { + constexpr static uint32_t value = 4U; +}; + +template +[[maybe_unused]] constexpr static size_t common_alignment = + commonAlignmentHelper::value; + +template +using PLAllocator = std::conditional_t, + AlignedAllocator>; +} // namespace Pennylane diff --git a/pennylane_lightning/src/util/TypeList.hpp b/pennylane_lightning/src/util/TypeList.hpp index e288bd80a5..97db820da7 100644 --- a/pennylane_lightning/src/util/TypeList.hpp +++ b/pennylane_lightning/src/util/TypeList.hpp @@ -18,14 +18,19 @@ #pragma once #include +#include #include +#include namespace Pennylane::Util { template struct TypeNode { using Type = T; using Next = TypeNode; }; - +template struct TypeNode { + using Type = T; + using Next = void; +}; template struct TypeNode { using Type = T; using Next = void; @@ -36,16 +41,22 @@ template struct TypeNode { */ template using TypeList = TypeNode; -template struct getNthType { - static_assert(!std::is_same_v, - "The given n is larger than the length of the typelist."); - using Type = getNthType; +template struct getNth { + using Type = typename getNth::Type; }; -template struct getNthType { +template struct getNth { + static_assert(!std::is_same_v, + "The given n is larger than the length of the type list."); using Type = typename TypeList::Type; }; +/** + * @brief Alias + */ +template +using getNthType = typename getNth::Type; + template constexpr size_t length() { if constexpr (std::is_same_v) { return 0; @@ -53,4 +64,15 @@ template constexpr size_t length() { return 1 + length(); } } + +template struct PrependToTypeList; + +template +struct PrependToTypeList> { + using Type = TypeNode; +}; +template struct PrependToTypeList { + using Type = TypeNode; +}; + } // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp index 3b184b82f9..ca029c0609 100644 --- a/pennylane_lightning/src/util/Util.hpp +++ b/pennylane_lightning/src/util/Util.hpp @@ -404,4 +404,39 @@ auto chunkData(const Container &data, std::size_t num_chunks) // type alias template using remove_cvref_t = typename remove_cvref::type; +/** + * @brief Iterate over all enum values (if BEGIN and END are defined). + * + * @tparam T enum type + * @tparam Func function to execute + */ +template void for_each_enum(Func &&func) { + for (auto e = T::BEGIN; e != T::END; + e = static_cast(std::underlying_type_t(e) + 1)) { + func(e); + } +} +template void for_each_enum(Func &&func) { + for (auto e1 = T::BEGIN; e1 != T::END; + e1 = static_cast(std::underlying_type_t(e1) + 1)) { + for (auto e2 = U::BEGIN; e2 != U::END; + e2 = static_cast(std::underlying_type_t(e2) + 1)) { + func(e1, e2); + } + } +} + +template struct common_alignment { + constexpr static size_t value = + std::max(TypeList::Type::template required_alignment, + common_alignment::value); +}; +template struct common_alignment { + constexpr static size_t value = std::alignment_of_v; +}; + +template +[[maybe_unused]] constexpr static size_t common_alignment_v = + common_alignment::value; + } // namespace Pennylane::Util From 9398a0ab728bfd849c5f19b46b5cc4713b55a851 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sun, 27 Feb 2022 00:34:13 -0500 Subject: [PATCH 02/94] StateVector architecture refatored; pass tests --- pennylane_lightning/lightning_qubit.py | 5 + .../src/algorithms/AdjointDiff.hpp | 49 ++++--- pennylane_lightning/src/bindings/Bindings.hpp | 7 + .../DefaultKernelsForStateVector.hpp | 15 +- .../src/simulator/Measures.hpp | 16 +-- .../src/simulator/StateVectorCPU.hpp | 110 ++------------ .../src/simulator/StateVectorManagedCPU.cpp | 19 +++ .../src/simulator/StateVectorManagedCPU.hpp | 134 ++++++++++++++++++ ...ateVectorRaw.cpp => StateVectorRawCPU.cpp} | 6 +- ...ateVectorRaw.hpp => StateVectorRawCPU.hpp} | 25 ++-- pennylane_lightning/src/tests/.clang-tidy | 2 +- pennylane_lightning/src/tests/CMakeLists.txt | 2 - .../src/tests/CreateAllWires.cpp | 2 +- .../src/tests/Test_AdjDiff.cpp | 35 +++-- ...est_GateImplementations_CompareKernels.cpp | 2 - .../Test_GateImplementations_Nonparam.cpp | 11 +- .../tests/Test_GateImplementations_Param.cpp | 12 +- .../src/tests/Test_Measures.cpp | 24 ++-- .../src/tests/Test_StateVectorBase.cpp | 0 .../src/tests/Test_StateVectorCPU.cpp | 76 +++++++--- .../src/tests/Test_StateVectorRaw.cpp | 47 ------ pennylane_lightning/src/tests/Test_Util.cpp | 6 +- .../src/tests/Test_VectorJacobianProduct.cpp | 50 +++---- pennylane_lightning/src/util/Memory.hpp | 8 +- 24 files changed, 370 insertions(+), 293 deletions(-) create mode 100644 pennylane_lightning/src/simulator/StateVectorManagedCPU.cpp create mode 100644 pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp rename pennylane_lightning/src/simulator/{StateVectorRaw.cpp => StateVectorRawCPU.cpp} (82%) rename pennylane_lightning/src/simulator/{StateVectorRaw.hpp => StateVectorRawCPU.hpp} (82%) delete mode 100644 pennylane_lightning/src/tests/Test_StateVectorBase.cpp delete mode 100644 pennylane_lightning/src/tests/Test_StateVectorRaw.cpp diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py index 4fe1851a69..f6dd12a3fe 100644 --- a/pennylane_lightning/lightning_qubit.py +++ b/pennylane_lightning/lightning_qubit.py @@ -116,6 +116,11 @@ def __init__(self, wires, *, kernel_for_ops=None, shots=None, batch_obs=False): super().__init__(wires, shots=shots) self._batch_obs = batch_obs + # Lightning keeps a simulator memory of which is managed by C++ + # Note that as C++ manages the data, we need to copy from this array when + # the result is used outside of the module + self.sim_ = None + @classmethod def capabilities(cls): capabilities = super().capabilities().copy() diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp index 9b69139260..c717681b40 100644 --- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp +++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp @@ -27,7 +27,7 @@ #include "Error.hpp" #include "JacobianTape.hpp" #include "LinearAlgebra.hpp" -#include "StateVectorCPU.hpp" +#include "StateVectorManagedCPU.hpp" #include @@ -49,7 +49,7 @@ namespace Pennylane::Algorithms { */ template class AdjointJacobian { private: - using GeneratorFunc = void (*)(StateVectorCPU &, + using GeneratorFunc = void (*)(StateVectorManagedCPU &, const std::vector &, const bool); // function pointer type @@ -64,8 +64,8 @@ template class AdjointJacobian { * @param obs_index Observable index position of Jacobian to update. * @param param_index Parameter index position of Jacobian to update. */ - inline void updateJacobian(const StateVectorCPU &sv1, - const StateVectorCPU &sv2, + inline void updateJacobian(const StateVectorManagedCPU &sv1, + const StateVectorManagedCPU &sv2, std::vector> &jac, T scaling_coeff, size_t obs_index, size_t param_index) { @@ -77,13 +77,13 @@ template class AdjointJacobian { /** * @brief Utility method to apply all operations from given `%OpsData` - * object to `%StateVectorCPU` + * object to `%StateVectorManagedCPU` * * @param state Statevector to be updated. * @param operations Operations to apply. * @param adj Take the adjoint of the given operations. */ - inline void applyOperations(StateVectorCPU &state, + inline void applyOperations(StateVectorManagedCPU &state, const OpsData &operations, bool adj = false) { for (size_t op_idx = 0; op_idx < operations.getOpsName().size(); @@ -96,13 +96,13 @@ template class AdjointJacobian { } /** * @brief Utility method to apply the adjoint indexed operation from - * `%OpsData` object to `%StateVectorCPU`. + * `%OpsData` object to `%StateVectorManagedCPU`. * * @param state Statevector to be updated. * @param operations Operations to apply. * @param op_idx Adjointed operation index to apply. */ - inline void applyOperationAdj(StateVectorCPU &state, + inline void applyOperationAdj(StateVectorManagedCPU &state, const OpsData &operations, size_t op_idx) { state.applyOperation(operations.getOpsName()[op_idx], operations.getOpsWires()[op_idx], @@ -112,12 +112,12 @@ template class AdjointJacobian { /** * @brief Utility method to apply a given operations from given - * `%ObsDatum` object to `%StateVectorCPU` + * `%ObsDatum` object to `%StateVectorManagedCPU` * * @param state Statevector to be updated. * @param observable Observable to apply. */ - inline void applyObservable(StateVectorCPU &state, + inline void applyObservable(StateVectorManagedCPU &state, const ObsDatum &observable) { using namespace Pennylane::Util; for (size_t j = 0; j < observable.getSize(); j++) { @@ -159,9 +159,10 @@ template class AdjointJacobian { * @param reference_state Reference statevector * @param observables Vector of observables to apply to each statevector. */ - inline void applyObservables(std::vector> &states, - const StateVectorCPU &reference_state, - const std::vector> &observables) { + inline void + applyObservables(std::vector> &states, + const StateVectorManagedCPU &reference_state, + const std::vector> &observables) { // clang-format off // Globally scoped exception value to be captured within OpenMP block. // See the following for OpenMP design decisions: @@ -209,9 +210,9 @@ template class AdjointJacobian { * @param op_idx Index of given operation within operations list to take * adjoint of. */ - inline void applyOperationsAdj(std::vector> &states, - const OpsData &operations, - size_t op_idx) { + inline void + applyOperationsAdj(std::vector> &states, + const OpsData &operations, size_t op_idx) { // clang-format off // Globally scoped exception value to be captured within OpenMP block. // See the following for OpenMP design decisions: @@ -300,7 +301,7 @@ template class AdjointJacobian { * of parametric gates. * * For the statevector data associated with `psi` of length `num_elements`, - * we make internal copies to a `%StateVectorCPU` object, with one + * we make internal copies to a `%StateVectorManagedCPU` object, with one * per required observable. The `operations` will be applied to the internal * statevector copies, with the operation indices participating in the * gradient calculations given in `trainableParams`, and the overall number @@ -335,7 +336,8 @@ template class AdjointJacobian { num_param_ops - 1; // total number of parametric ops // Create $U_{1:p}\vert \lambda \rangle$ - StateVectorCPU lambda(jd.getPtrStateVec(), jd.getSizeStateVec()); + StateVectorManagedCPU lambda(jd.getPtrStateVec(), + jd.getSizeStateVec()); // Apply given operations to statevector if requested if (apply_operations) { @@ -345,14 +347,15 @@ template class AdjointJacobian { const auto tp_begin = tp.begin(); auto tp_it = tp.end(); - StateVectorCPU sv{lambda.getNumQubits(), Threading::SingleThread}; + StateVectorManagedCPU sv{lambda.getNumQubits(), + Threading::SingleThread}; // Create observable-applied state-vectors - std::vector> H_lambda( - num_observables, - StateVectorCPU{lambda.getNumQubits(), Threading::SingleThread}); + std::vector> H_lambda( + num_observables, StateVectorManagedCPU{lambda.getNumQubits(), + Threading::SingleThread}); applyObservables(H_lambda, lambda, obs); - StateVectorCPU mu(lambda.getNumQubits()); + StateVectorManagedCPU mu(lambda.getNumQubits()); for (int op_idx = static_cast(ops_name.size() - 1); op_idx >= 0; op_idx--) { diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index 5d79774ffd..a1845ba67d 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -63,6 +63,13 @@ static auto create(pybind11::array_t> &numpyArray) {data_ptr, static_cast(numpyArrayInfo.shape[0])}); } +template +static auto toNumpyArray(const StateVectorCPU &sv) + -> py::array_t> { + return py::array_t>( + {sv.getLength()}, {sizeof(PrecisionT)} sv.getData(), ); +} + /** * @brief Apply given list of operations to Numpy data array using C++ * `%StateVector` class. diff --git a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp index 72613bc386..3259ad5861 100644 --- a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp +++ b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp @@ -123,15 +123,18 @@ class DefaultKernelsForStateVector { all_memory_model, all_qubit_numbers, Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::IsingXX, all_threading, - all_memory_model, less_than(12), - Gates::KernelType::LM); + instance.assignKernelForGate( + GateOperation::IsingXX, all_threading, + // NOLINTNEXTLINE(readability-magic-numbers) + all_memory_model, less_than(12), Gates::KernelType::LM); instance.assignKernelForGate( GateOperation::IsingXX, all_threading, all_memory_model, + // NOLINTNEXTLINE(readability-magic-numbers) in_between_closed(12, 20), Gates::KernelType::PI); - instance.assignKernelForGate(GateOperation::IsingXX, all_threading, - all_memory_model, larger_than(20), - Gates::KernelType::LM); + instance.assignKernelForGate( + GateOperation::IsingXX, all_threading, + // NOLINTNEXTLINE(readability-magic-numbers) + all_memory_model, larger_than(20), Gates::KernelType::LM); instance.assignKernelForGate(GateOperation::IsingYY, all_threading, all_memory_model, all_qubit_numbers, diff --git a/pennylane_lightning/src/simulator/Measures.hpp b/pennylane_lightning/src/simulator/Measures.hpp index f2f1cc010a..d03031ee36 100644 --- a/pennylane_lightning/src/simulator/Measures.hpp +++ b/pennylane_lightning/src/simulator/Measures.hpp @@ -26,8 +26,8 @@ #include #include "LinearAlgebra.hpp" -#include "StateVectorCPU.hpp" -#include "StateVectorRaw.hpp" +#include "StateVectorManagedCPU.hpp" +#include "StateVectorRawCPU.hpp" namespace Pennylane { /** @@ -39,14 +39,14 @@ namespace Pennylane { * * @tparam fp_t Floating point precision of underlying measurements. */ -template > +template > class Measures { private: const SVType &original_statevector; using CFP_t = std::complex; public: - Measures(const SVType &provided_statevector) + explicit Measures(const SVType &provided_statevector) : original_statevector{provided_statevector} {}; /** @@ -123,7 +123,7 @@ class Measures { const std::vector &wires) { // Copying the original state vector, for the application of the // observable operator. - StateVectorCPU operator_statevector(original_statevector); + StateVectorManagedCPU operator_statevector(original_statevector); operator_statevector.applyMatrix(matrix, wires); @@ -143,7 +143,7 @@ class Measures { const std::vector &wires) { // Copying the original state vector, for the application of the // observable operator. - StateVectorCPU operator_statevector(original_statevector); + StateVectorManagedCPU operator_statevector(original_statevector); operator_statevector.applyOperation(operation, wires); @@ -190,7 +190,7 @@ class Measures { fp_t var(const std::string &operation, const std::vector &wires) { // Copying the original state vector, for the application of the // observable operator. - StateVectorCPU operator_statevector(original_statevector); + StateVectorManagedCPU operator_statevector(original_statevector); operator_statevector.applyOperation(operation, wires); @@ -216,7 +216,7 @@ class Measures { const std::vector &wires) { // Copying the original state vector, for the application of the // observable operator. - StateVectorCPU operator_statevector(original_statevector); + StateVectorManagedCPU operator_statevector(original_statevector); operator_statevector.applyMatrix(matrix, wires); diff --git a/pennylane_lightning/src/simulator/StateVectorCPU.hpp b/pennylane_lightning/src/simulator/StateVectorCPU.hpp index a003adf1ba..89ff8d40a4 100644 --- a/pennylane_lightning/src/simulator/StateVectorCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorCPU.hpp @@ -22,22 +22,18 @@ namespace Pennylane { /** - * @brief StateVector class where data resides in CPU memory. Memory ownership - * resides within class. + * @brief StateVector class where data resides in CPU memory. * - * We currently use std::unique_ptr to C-style array as we want to choose - * allocator in runtime. This is impossible with std::vector. - * - * @tparam PrecisionT + * @tparam PrecisionT Data floating point type + * @tparam Derived Derived class for CRTP. */ -template -class StateVectorCPU - : public StateVectorBase> { +template +class StateVectorCPU : public StateVectorBase { public: using ComplexPrecisionT = std::complex; private: - using BaseType = StateVectorBase; + using BaseType = StateVectorBase; Threading threading_; CPUMemoryModel memory_model_; @@ -46,8 +42,6 @@ class StateVectorCPU kernel_for_gates_; std::unordered_map kernel_for_generators_; - std::unique_ptr - data_; // NOLINT(modernize-avoid-c-arrays) void setKernels(size_t num_qubits, Threading threading, CPUMemoryModel memory_model) { @@ -58,90 +52,15 @@ class StateVectorCPU num_qubits, threading, memory_model); } - public: - explicit StateVectorCPU(size_t num_qubits, - Threading threading = bestThreading(), - CPUMemoryModel memory_model = bestCPUMemoryModel()) + protected: + explicit StateVectorCPU(size_t num_qubits, Threading threading, + CPUMemoryModel memory_model) : BaseType(num_qubits), threading_{threading}, memory_model_{ memory_model} { - setKernels(num_qubits, threading, memory_model); - - size_t length = BaseType::getLength(); - data_ = std::unique_ptr{new (std::align_val_t{ - 64}) ComplexPrecisionT[length]}; // NOLINT(modernize-avoid-c-arrays) - std::fill(data_.get(), data_.get() + length, - ComplexPrecisionT{0.0, 0.0}); - data_[0] = {1, 0}; - } - - template - explicit StateVectorCPU( - const StateVectorBase &other, - Threading threading = bestThreading(), - CPUMemoryModel memory_model = bestCPUMemoryModel()) - : BaseType(other.getNumQubits()), threading_{threading}, - memory_model_{memory_model} { - - size_t length = BaseType::getLength(); - data_ = std::unique_ptr{new (std::align_val_t{ - 64}) ComplexPrecisionT[length]}; // NOLINT(modernize-avoid-c-arrays) - - std::copy(other.getData(), other.getData() + length, data_.get()); - - setKernels(BaseType::getNumQubits(), threading, memory_model); - } - - StateVectorCPU(const ComplexPrecisionT *other_data, size_t other_size, - Threading threading = bestThreading(), - CPUMemoryModel memory_model = bestCPUMemoryModel()) - : BaseType(Util::log2PerfectPower(other_size)), threading_{threading}, - memory_model_{memory_model} { - PL_ABORT_IF_NOT(Util::isPerfectPowerOf2(other_size), - "The size of provided data must be a power of 2."); - data_ = std::unique_ptr{ - new (std::align_val_t{64}) ComplexPrecisionT - [other_size]}; // NOLINT(modernize-avoid-c-arrays) - setKernels(BaseType::getNumQubits(), threading, memory_model); - - updateData(other_data); - } - - template - explicit StateVectorCPU( - const std::vector, Alloc> &rhs, - Threading threading = bestThreading(), - CPUMemoryModel memory_model = bestCPUMemoryModel()) - : StateVectorCPU(rhs.data(), rhs.size(), threading, - memory_model) // NOLINT(hicpp-member-init) - // this is false positive for delegating - // constructor from clang-tidy - {} - - StateVectorCPU(const StateVectorCPU &rhs) - : BaseType(rhs.getNumQubits()), threading_{rhs.threading_}, - memory_model_{rhs.memory_model_} { - setKernels(BaseType::getNumQubits(), threading_, memory_model_); - - size_t length = BaseType::getLength(); - data_ = std::unique_ptr{new (std::align_val_t{ - 64}) ComplexPrecisionT[length]}; // NOLINT(modernize-avoid-c-arrays) - std::copy(rhs.getData(), rhs.getData() + length, data_.get()); - } - - StateVectorCPU(StateVectorCPU &&) noexcept = default; - - StateVectorCPU &operator=(const StateVectorCPU &) = delete; - StateVectorCPU &operator=(StateVectorCPU &&) noexcept = default; - - ~StateVectorCPU() = default; - - [[nodiscard]] auto getData() -> ComplexPrecisionT * { return data_.get(); } - - [[nodiscard]] auto getData() const -> const ComplexPrecisionT * { - return data_.get(); } + public: [[nodiscard]] inline auto getKernelForGate(Gates::GateOperation gate_op) const -> Gates::KernelType { return kernel_for_gates_.at(gate_op); @@ -152,15 +71,6 @@ class StateVectorCPU -> Gates::KernelType { return kernel_for_generators_.at(gntr_op); } - - /** - * @brief Update data of the class to new_data - * - * @param new_data std::vector contains data. - */ - void updateData(const ComplexPrecisionT *data) { - std::copy(data, data + BaseType::getLength(), data_.get()); - } }; } // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/StateVectorManagedCPU.cpp b/pennylane_lightning/src/simulator/StateVectorManagedCPU.cpp new file mode 100644 index 0000000000..90a13bf549 --- /dev/null +++ b/pennylane_lightning/src/simulator/StateVectorManagedCPU.cpp @@ -0,0 +1,19 @@ +// Copyright 2021 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "StateVectorManagedCPU.hpp" + +// explicit instantiation +template class Pennylane::StateVectorManagedCPU; +template class Pennylane::StateVectorManagedCPU; diff --git a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp new file mode 100644 index 0000000000..b36aac7f6d --- /dev/null +++ b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp @@ -0,0 +1,134 @@ +// Copyright 2021 Xanadu Quantum Technologies Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "BitUtil.hpp" +#include "DispatchKeys.hpp" +#include "Gates.hpp" +#include "KernelType.hpp" +#include "Memory.hpp" +#include "StateVectorBase.hpp" +#include "StateVectorCPU.hpp" +#include "Util.hpp" + +namespace Pennylane { + +/** + * @brief StateVector class where data resides in CPU memory. Memory ownership + * resides within class. + * + * We currently use std::unique_ptr to C-style array as we want to choose + * allocator in runtime. This is impossible with std::vector. + * + * @tparam PrecisionT + */ +template +class StateVectorManagedCPU + : public StateVectorCPU> { + public: + using ComplexPrecisionT = std::complex; + + private: + using BaseType = StateVectorCPU; + + // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) + std::unique_ptr data_; + + public: + explicit StateVectorManagedCPU( + size_t num_qubits, Threading threading = bestThreading(), + CPUMemoryModel memory_model = bestCPUMemoryModel()) + : BaseType{num_qubits, threading, memory_model} { + + size_t length = BaseType::getLength(); + // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) + data_ = std::unique_ptr{ + new (std::align_val_t{64}) ComplexPrecisionT[length]}; + std::fill(data_.get(), data_.get() + length, + ComplexPrecisionT{0.0, 0.0}); + data_[0] = {1, 0}; + } + + template + explicit StateVectorManagedCPU( + const StateVectorBase &other, + Threading threading = bestThreading(), + CPUMemoryModel memory_model = bestCPUMemoryModel()) + : BaseType(other.getNumQubits(), threading, memory_model) { + + size_t length = BaseType::getLength(); + // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) + data_ = std::unique_ptr{ + new (std::align_val_t{64}) ComplexPrecisionT[length]}; + + std::copy(other.getData(), other.getData() + length, data_.get()); + + setKernels(BaseType::getNumQubits(), threading, memory_model); + } + + StateVectorManagedCPU(const ComplexPrecisionT *other_data, + size_t other_size, + Threading threading = bestThreading(), + CPUMemoryModel memory_model = bestCPUMemoryModel()) + : BaseType(Util::log2PerfectPower(other_size), threading, + memory_model) { + PL_ABORT_IF_NOT(Util::isPerfectPowerOf2(other_size), + "The size of provided data must be a power of 2."); + + // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) + data_ = std::unique_ptr{ + new (std::align_val_t{64}) ComplexPrecisionT[other_size]}; + updateData(other_data); + } + + // Clang-tidy gives false positive for delegating constructor + template + // NOLINTNEXTLINE(hicpp-member-init) + explicit StateVectorManagedCPU( + const std::vector, Alloc> &rhs, + Threading threading = bestThreading(), + CPUMemoryModel memory_model = bestCPUMemoryModel()) + : StateVectorManagedCPU(rhs.data(), rhs.size(), threading, + memory_model) {} + + StateVectorManagedCPU(const StateVectorManagedCPU &rhs) : BaseType(rhs) { + size_t length = BaseType::getLength(); + // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) + data_ = std::unique_ptr{ + new (std::align_val_t{64}) ComplexPrecisionT[length]}; + std::copy(rhs.getData(), rhs.getData() + length, data_.get()); + } + + StateVectorManagedCPU(StateVectorManagedCPU &&) noexcept = default; + + StateVectorManagedCPU &operator=(const StateVectorManagedCPU &) = delete; + StateVectorManagedCPU & + operator=(StateVectorManagedCPU &&) noexcept = default; + + ~StateVectorManagedCPU() = default; + + [[nodiscard]] auto getData() -> ComplexPrecisionT * { return data_.get(); } + + [[nodiscard]] auto getData() const -> const ComplexPrecisionT * { + return data_.get(); + } + + /** + * @brief Update data of the class to new_data + * + * @param new_data std::vector contains data. + */ + void updateData(const ComplexPrecisionT *data) { + std::copy(data, data + BaseType::getLength(), data_.get()); + } +}; + +} // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/StateVectorRaw.cpp b/pennylane_lightning/src/simulator/StateVectorRawCPU.cpp similarity index 82% rename from pennylane_lightning/src/simulator/StateVectorRaw.cpp rename to pennylane_lightning/src/simulator/StateVectorRawCPU.cpp index 65e6664e09..7454f66a65 100644 --- a/pennylane_lightning/src/simulator/StateVectorRaw.cpp +++ b/pennylane_lightning/src/simulator/StateVectorRawCPU.cpp @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "StateVectorRaw.hpp" +#include "StateVectorRawCPU.hpp" // explicit instantiation -template class Pennylane::StateVectorRaw; -template class Pennylane::StateVectorRaw; +template class Pennylane::StateVectorRawCPU; +template class Pennylane::StateVectorRawCPU; diff --git a/pennylane_lightning/src/simulator/StateVectorRaw.hpp b/pennylane_lightning/src/simulator/StateVectorRawCPU.hpp similarity index 82% rename from pennylane_lightning/src/simulator/StateVectorRaw.hpp rename to pennylane_lightning/src/simulator/StateVectorRawCPU.hpp index f25b2c2151..57c0775774 100644 --- a/pennylane_lightning/src/simulator/StateVectorRaw.hpp +++ b/pennylane_lightning/src/simulator/StateVectorRawCPU.hpp @@ -24,7 +24,7 @@ #include "BitUtil.hpp" #include "Error.hpp" -#include "StateVectorBase.hpp" +#include "StateVectorCPU.hpp" #include @@ -44,10 +44,10 @@ namespace Pennylane { * @tparam PrecisionT Floating point precision of underlying statevector data. */ template -class StateVectorRaw - : public StateVectorBase> { +class StateVectorRawCPU + : public StateVectorCPU> { public: - using Base = StateVectorBase>; + using BaseType = StateVectorCPU>; using ComplexPrecisionT = std::complex; private: @@ -61,9 +61,10 @@ class StateVectorRaw * @param data Raw data pointer. * @param length The size of the data, i.e. 2^(number of qubits). */ - StateVectorRaw(ComplexPrecisionT *data, size_t length) - : StateVectorBase>( - Util::log2PerfectPower(length)), + StateVectorRawCPU(ComplexPrecisionT *data, size_t length, + Threading threading = bestThreading()) + : BaseType{Util::log2PerfectPower(length), threading, + getMemoryModel(static_cast(data))}, data_{data}, length_(length) { // check length is perfect power of 2 if (!Util::isPerfectPowerOf2(length)) { @@ -74,14 +75,6 @@ class StateVectorRaw } } - StateVectorRaw(const StateVectorRaw &) = default; - StateVectorRaw(StateVectorRaw &&) noexcept = default; - - auto operator=(const StateVectorRaw &) -> StateVectorRaw & = default; - auto operator=(StateVectorRaw &&) noexcept -> StateVectorRaw & = default; - - ~StateVectorRaw() = default; - /** * @brief Get the underlying data pointer. * @@ -110,7 +103,7 @@ class StateVectorRaw " is given."); // TODO: change to std::format in C++20 } data_ = data; - Base::setNumQubits(Util::log2PerfectPower(length)); + BaseType::setNumQubits(Util::log2PerfectPower(length)); length_ = length; } diff --git a/pennylane_lightning/src/tests/.clang-tidy b/pennylane_lightning/src/tests/.clang-tidy index 0a70c347b0..3b5744a4b0 100644 --- a/pennylane_lightning/src/tests/.clang-tidy +++ b/pennylane_lightning/src/tests/.clang-tidy @@ -1,5 +1,5 @@ --- -Checks: 'clang-diagnostic-*,clang-analyzer-*,-*,-llvmlibc-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,-readability-magic-numbers,hicpp-*,-hicpp-no-array-decay,bugprone-suspicious-*,llvm-namespace-comment,cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-slicing,cppcoreguidelines-special-member-functions' +Checks: 'clang-diagnostic-*,clang-analyzer-*,-*,-llvmlibc-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,-modernize-avoid-c-arrays,-readability-magic-numbers,hicpp-*,-hicpp-no-array-decay,-hicpp-avoid-c-arrays,bugprone-suspicious-*,llvm-namespace-comment,cppcoreguidelines-slicing,cppcoreguidelines-special-member-functions' WarningsAsErrors: '*' HeaderFilterRegex: '.*' AnalyzeTemporaryDtors: false diff --git a/pennylane_lightning/src/tests/CMakeLists.txt b/pennylane_lightning/src/tests/CMakeLists.txt index fbe9b621ea..64ebd3a39d 100644 --- a/pennylane_lightning/src/tests/CMakeLists.txt +++ b/pennylane_lightning/src/tests/CMakeLists.txt @@ -70,7 +70,6 @@ target_link_libraries(compile_time_tests lightning_gates lightning_utils) set(TEST_SOURCES CreateAllWires.cpp Test_AdjDiff.cpp -# Test_Bindings.cpp Test_DynamicDispatcher.cpp Test_DefaultKernelsForStateVector.cpp Test_GateImplementations_CompareKernels.cpp @@ -84,7 +83,6 @@ set(TEST_SOURCES CreateAllWires.cpp Test_Measures.cpp Test_OpToMemberFuncPtr.cpp Test_StateVectorCPU.cpp - Test_StateVectorRaw.cpp Test_Util.cpp Test_VectorJacobianProduct.cpp) diff --git a/pennylane_lightning/src/tests/CreateAllWires.cpp b/pennylane_lightning/src/tests/CreateAllWires.cpp index 43a7e80ce4..4738554b54 100644 --- a/pennylane_lightning/src/tests/CreateAllWires.cpp +++ b/pennylane_lightning/src/tests/CreateAllWires.cpp @@ -13,7 +13,7 @@ auto crateAllWires(size_t n_qubits, Gates::GateOperation gate_op, bool order) wires.reserve(Util::popcount(k)); for (size_t i = 0; i < n_qubits; i++) { - if (((k >> i) & 1) == 1) { + if (((k >> i) & 1U) == 1U) { wires.emplace_back(i); } } diff --git a/pennylane_lightning/src/tests/Test_AdjDiff.cpp b/pennylane_lightning/src/tests/Test_AdjDiff.cpp index 6a05a36018..696d66d41d 100644 --- a/pennylane_lightning/src/tests/Test_AdjDiff.cpp +++ b/pennylane_lightning/src/tests/Test_AdjDiff.cpp @@ -13,7 +13,7 @@ #include #include "AdjointDiff.hpp" -#include "StateVectorCPU.hpp" +#include "StateVectorRawCPU.hpp" #include "Util.hpp" #include "TestHelpers.hpp" @@ -50,12 +50,10 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=RX, Obs=Z", for (const auto &p : param) { auto ops = OpsData({"RX"}, {{p}}, {{0}}, {false}); - /* std::vector> cdata(0b1 << num_qubits); cdata[0] = std::complex{1, 0}; - */ - StateVectorCPU psi(num_qubits); + StateVectorRawCPU psi(cdata.data(), cdata.size()); std::vector tp{0}; std::vector> obs_ls{obs}; @@ -84,7 +82,10 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=RY, Obs=X", for (const auto &p : param) { auto ops = OpsData({"RY"}, {{p}}, {{0}}, {false}); - StateVectorCPU psi(num_qubits); + std::vector> cdata(0b1 << num_qubits); + cdata[0] = std::complex{1, 0}; + + StateVectorRawCPU psi(cdata.data(), cdata.size()); std::vector tp{0}; std::vector> obs_ls{obs}; @@ -108,7 +109,9 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=RX, Obs=[Z,Z]", const size_t num_obs = 2; std::vector jacobian(num_obs * num_params, 0); - StateVectorCPU psi(num_qubits); + std::vector> cdata(0b1 << num_qubits); + StateVectorRawCPU psi(cdata.data(), cdata.size()); + cdata[0] = std::complex{1, 0}; auto obs1 = ObsDatum({"PauliZ"}, {{}}, {{0}}); auto obs2 = ObsDatum({"PauliZ"}, {{}}, {{1}}); @@ -137,7 +140,9 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=[RX,RX,RX], Obs=[Z,Z,Z]", const size_t num_obs = 3; std::vector jacobian(num_obs * num_params, 0); - StateVectorCPU psi(num_qubits); + std::vector> cdata(0b1 << num_qubits); + StateVectorRawCPU psi(cdata.data(), cdata.size()); + cdata[0] = std::complex{1, 0}; auto obs1 = ObsDatum({"PauliZ"}, {{}}, {{0}}); auto obs2 = ObsDatum({"PauliZ"}, {{}}, {{1}}); @@ -174,7 +179,9 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=[RX,RX,RX], Obs=[Z,Z,Z], " std::vector jacobian(num_obs * num_params, 0); std::vector t_params{0, 2}; - StateVectorCPU psi(num_qubits); + std::vector> cdata(0b1 << num_qubits); + StateVectorRawCPU psi(cdata.data(), cdata.size()); + cdata[0] = std::complex{1, 0}; auto obs1 = ObsDatum({"PauliZ"}, {{}}, {{0}}); auto obs2 = ObsDatum({"PauliZ"}, {{}}, {{1}}); @@ -207,7 +214,9 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=[RX,RX,RX], Obs=[ZZZ]", const size_t num_obs = 1; std::vector jacobian(num_obs * num_params, 0); - StateVectorCPU psi(num_qubits); + std::vector> cdata(0b1 << num_qubits); + StateVectorRawCPU psi(cdata.data(), cdata.size()); + cdata[0] = std::complex{1, 0}; auto obs = ObsDatum({"PauliZ", "PauliZ", "PauliZ"}, {{}, {}, {}}, {{0}, {1}, {2}}); @@ -240,7 +249,9 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=Mixed, Obs=[XXX]", const size_t num_obs = 1; std::vector jacobian(num_obs * num_params, 0); - StateVectorCPU psi(num_qubits); + std::vector> cdata(0b1 << num_qubits); + StateVectorRawCPU psi(cdata.data(), cdata.size()); + cdata[0] = std::complex{1, 0}; auto obs = ObsDatum({"PauliX", "PauliX", "PauliX"}, {{}, {}, {}}, {{0}, {1}, {2}}); @@ -301,7 +312,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Decomposed Rot gate, non " std::vector> cdata{INVSQRT2(), -INVSQRT2()}; - StateVectorCPU psi(cdata.data(), cdata.size()); + StateVectorRawCPU psi(cdata.data(), cdata.size()); auto obs = ObsDatum({"PauliZ"}, {{}}, {{0}}); auto ops = OpsData( @@ -342,7 +353,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Mixed Ops, Obs and TParams", std::vector> cdata{ONE(), ZERO(), ZERO(), ZERO()}; - StateVectorCPU psi(cdata); + StateVectorRawCPU psi(cdata.data(), cdata.size()); auto obs = ObsDatum({"PauliX", "PauliZ"}, {{}, {}}, {{0}, {1}}); auto ops = OpsData( diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp index 30d6894b08..aea72009e9 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp @@ -25,9 +25,7 @@ using namespace Pennylane; using namespace Pennylane::Gates; using namespace Pennylane::Util; -namespace { using std::vector; -} template std::string kernelsToString() { if constexpr (!std::is_same_v) { diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp index 85772294ff..2dde03af2b 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp @@ -21,9 +21,7 @@ */ using namespace Pennylane; -namespace { using std::vector; -} /** * @brief Run test suit only when the gate is defined @@ -75,13 +73,14 @@ void testApplyPauliX() { << ", PauliX - " << PrecisionToName::value) { for (size_t index = 0; index < num_qubits; index++) { auto st = createZeroState(num_qubits); - CHECK(st[0] == Util::ONE()); GateImplementation::applyPauliX(st.data(), num_qubits, {index}, false); - CHECK(st[0] == Util::ZERO()); - CHECK(st[0b1 << (num_qubits - index - 1)] == - Util::ONE()); + + std::string expected_str("000"); + expected_str[index] = '1'; + REQUIRE(st == + PLApprox(createProductState(expected_str))); } } } diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp index 8e594fe3d6..74e6f3a767 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp @@ -245,15 +245,15 @@ void testApplyRot() { std::vector{2.3, 0.1, 0.4}}; std::vector> expected_results{ - std::vector(0b1 << num_qubits), - std::vector(0b1 << num_qubits), - std::vector(0b1 << num_qubits)}; + std::vector(1U << num_qubits), + std::vector(1U << num_qubits), + std::vector(1U << num_qubits)}; for (size_t i = 0; i < angles.size(); i++) { const auto rot_mat = Gates::getRot(angles[i][0], angles[i][1], angles[i][2]); expected_results[i][0] = rot_mat[0]; - expected_results[i][0b1 << (num_qubits - i - 1)] = rot_mat[2]; + expected_results[i][1U << (num_qubits - i - 1)] = rot_mat[2]; } for (size_t index = 0; index < num_qubits; index++) { @@ -1233,8 +1233,8 @@ void testApplyCRot() { std::vector expected_results(8); const auto rot_mat = Gates::getRot(angles[0], angles[1], angles[2]); - expected_results[0b1 << (num_qubits - 1)] = rot_mat[0]; - expected_results[(0b1 << num_qubits) - 2] = rot_mat[2]; + expected_results[1U << (num_qubits - 1)] = rot_mat[0]; + expected_results[(1U << num_qubits) - 2] = rot_mat[2]; DYNAMIC_SECTION(GateImplementation::name << ", CRot0,1 |000> -> |000> - " diff --git a/pennylane_lightning/src/tests/Test_Measures.cpp b/pennylane_lightning/src/tests/Test_Measures.cpp index b7ec1e8fd5..6f18a458df 100644 --- a/pennylane_lightning/src/tests/Test_Measures.cpp +++ b/pennylane_lightning/src/tests/Test_Measures.cpp @@ -3,7 +3,7 @@ #include #include "Measures.hpp" -#include "StateVectorCPU.hpp" +#include "StateVectorManagedCPU.hpp" #include "Util.hpp" #include @@ -17,14 +17,14 @@ using std::string; using std::vector; }; // namespace -StateVectorCPU Initializing_StateVector() { +StateVectorManagedCPU Initializing_StateVector() { // Defining a StateVector in a non-trivial configuration: size_t num_qubits = 3; size_t data_size = std::pow(2, num_qubits); std::vector> arr(data_size, 0); arr[0] = 1; - StateVectorCPU Measured_StateVector(arr.data(), data_size); + StateVectorManagedCPU Measured_StateVector(arr.data(), data_size); std::vector wires; @@ -64,11 +64,13 @@ TEST_CASE("Probabilities", "[Measures]") { {1, 2}, {2, 1}, {0}, {1}, {2}}; // Defining the State Vector that will be measured. - StateVectorCPU Measured_StateVector = Initializing_StateVector(); + StateVectorManagedCPU Measured_StateVector = + Initializing_StateVector(); // Initializing the measures class. // It will attach to the StateVector, allowing measures to keep been taken. - Measures> Measurer(Measured_StateVector); + Measures> Measurer( + Measured_StateVector); vector probabilities; @@ -90,11 +92,13 @@ TEST_CASE("Probabilities", "[Measures]") { TEST_CASE("Expected Values", "[Measures]") { // Defining the State Vector that will be measured. - StateVectorCPU Measured_StateVector = Initializing_StateVector(); + StateVectorManagedCPU Measured_StateVector = + Initializing_StateVector(); // Initializing the measures class. // It will attach to the StateVector, allowing measures to keep been taken. - Measures> Measurer(Measured_StateVector); + Measures> Measurer( + Measured_StateVector); SECTION("Testing single operation defined by a matrix:") { vector> PauliX = {0, 1, 1, 0}; @@ -162,11 +166,13 @@ TEST_CASE("Expected Values", "[Measures]") { TEST_CASE("Variances", "[Measures]") { // Defining the State Vector that will be measured. - StateVectorCPU Measured_StateVector = Initializing_StateVector(); + StateVectorManagedCPU Measured_StateVector = + Initializing_StateVector(); // Initializing the measures class. // It will attach to the StateVector, allowing measures to keep been taken. - Measures> Measurer(Measured_StateVector); + Measures> Measurer( + Measured_StateVector); SECTION("Testing single operation defined by a matrix:") { vector> PauliX = {0, 1, 1, 0}; diff --git a/pennylane_lightning/src/tests/Test_StateVectorBase.cpp b/pennylane_lightning/src/tests/Test_StateVectorBase.cpp deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/pennylane_lightning/src/tests/Test_StateVectorCPU.cpp b/pennylane_lightning/src/tests/Test_StateVectorCPU.cpp index 17fd667c19..5b1e263de2 100644 --- a/pennylane_lightning/src/tests/Test_StateVectorCPU.cpp +++ b/pennylane_lightning/src/tests/Test_StateVectorCPU.cpp @@ -9,40 +9,80 @@ #include -#include "StateVectorCPU.hpp" -#include "StateVectorRaw.hpp" +#include "StateVectorManagedCPU.hpp" +#include "StateVectorRawCPU.hpp" #include "Util.hpp" #include "TestHelpers.hpp" using namespace Pennylane; -TEMPLATE_TEST_CASE("StateVectorCPU::StateVectorCPU", "[StateVectorRaw]", float, - double) { +TEMPLATE_TEST_CASE("StateVectorManagedCPU::StateVectorManagedCPU", + "[StateVectorRaw]", float, double) { using fp_t = TestType; - SECTION("StateVectorCPU") { - REQUIRE(!std::is_constructible_v>); + SECTION("StateVectorManagedCPU") { + REQUIRE(!std::is_constructible_v>); } - SECTION("StateVectorCPU") { - REQUIRE(!std::is_constructible_v>); + SECTION("StateVectorManagedCPU") { + REQUIRE(!std::is_constructible_v>); } - SECTION("StateVectorCPU {size_t}") { - REQUIRE(std::is_constructible_v, size_t>); + SECTION("StateVectorManagedCPU {size_t}") { + REQUIRE( + std::is_constructible_v, size_t>); const size_t num_qubits = 4; - StateVectorCPU sv(num_qubits); + StateVectorManagedCPU sv(num_qubits); REQUIRE(sv.getNumQubits() == 4); REQUIRE(sv.getLength() == 16); } - SECTION("StateVectorCPU {const StateVectorRaw&}") { - REQUIRE(std::is_constructible_v, - const StateVectorRaw &>); + SECTION("StateVectorManagedCPU {const " + "StateVectorRawCPU&}") { + REQUIRE(std::is_constructible_v, + const StateVectorRawCPU &>); } - SECTION("StateVectorCPU {const StateVectorCPU&}") { - REQUIRE(std::is_copy_constructible_v>); + SECTION("StateVectorManagedCPU {const " + "StateVectorManagedCPU&}") { + REQUIRE(std::is_copy_constructible_v>); } - SECTION("StateVectorCPU {StateVectorCPU&&}") { - REQUIRE(std::is_move_constructible_v>); + SECTION( + "StateVectorManagedCPU {StateVectorManagedCPU&&}") { + REQUIRE(std::is_move_constructible_v>); } } + +std::mt19937_64 re{1337}; + +TEMPLATE_TEST_CASE("StateVectorRawCPU::StateVectorRawCPU", + "[StateVectorRawCPU]", float, double) { + using fp_t = TestType; + + SECTION("StateVectorRawCPU {std::complex*, size_t}") { + const size_t num_qubits = 4; + auto st_data = createRandomState(re, num_qubits); + StateVectorRawCPU sv(st_data.data(), st_data.size()); + + REQUIRE(sv.getNumQubits() == 4); + REQUIRE(sv.getData() == st_data.data()); + REQUIRE(sv.getLength() == 16); + } + SECTION("StateVectorRawCPU {std::complex*, size_t}") { + std::vector> st_data(14, 0.0); + REQUIRE_THROWS(StateVectorRawCPU(st_data.data(), st_data.size())); + } +} + +TEMPLATE_TEST_CASE("StateVectorRawCPU::setData", "[StateVectorRawCPU]", float, + double) { + using fp_t = TestType; + + auto st_data = createRandomState(re, 4); + StateVectorRawCPU sv(st_data.data(), st_data.size()); + + auto st_data2 = createRandomState(re, 8); + sv.setData(st_data2.data(), st_data2.size()); + + REQUIRE(sv.getNumQubits() == 8); + REQUIRE(sv.getData() == st_data2.data()); + REQUIRE(sv.getLength() == (1U << 8U)); +} diff --git a/pennylane_lightning/src/tests/Test_StateVectorRaw.cpp b/pennylane_lightning/src/tests/Test_StateVectorRaw.cpp deleted file mode 100644 index 4700c74881..0000000000 --- a/pennylane_lightning/src/tests/Test_StateVectorRaw.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include -#include -#include - -#include "StateVectorRaw.hpp" -#include "TestHelpers.hpp" -#include "Util.hpp" - -#include - -using namespace Pennylane; - -std::mt19937_64 re{1337}; - -TEMPLATE_TEST_CASE("StateVectorRaw::StateVectorRaw", "[StateVectorRaw]", float, - double) { - using fp_t = TestType; - - SECTION("StateVectorRaw {std::complex*, size_t}") { - const size_t num_qubits = 4; - auto st_data = createRandomState(re, num_qubits); - StateVectorRaw sv(st_data.data(), st_data.size()); - - REQUIRE(sv.getNumQubits() == 4); - REQUIRE(sv.getData() == st_data.data()); - REQUIRE(sv.getLength() == 16); - } - SECTION("StateVectorRaw {std::complex*, size_t}") { - std::vector> st_data(14, 0.0); - REQUIRE_THROWS(StateVectorRaw(st_data.data(), st_data.size())); - } -} - -TEMPLATE_TEST_CASE("StateVectorRaw::setData", "[StateVectorRaw]", float, - double) { - using fp_t = TestType; - - auto st_data = createRandomState(re, 4); - StateVectorRaw sv(st_data.data(), st_data.size()); - - auto st_data2 = createRandomState(re, 8); - sv.setData(st_data2.data(), st_data2.size()); - - REQUIRE(sv.getNumQubits() == 8); - REQUIRE(sv.getData() == st_data2.data()); - REQUIRE(sv.getLength() == (1U << 8)); -} diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp index 2e804d05e2..8ac67d087a 100644 --- a/pennylane_lightning/src/tests/Test_Util.cpp +++ b/pennylane_lightning/src/tests/Test_Util.cpp @@ -468,7 +468,7 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, */ size_t popcount_slow(uint64_t x) { size_t c = 0; - for (; x != 0; x >>= 1) { + for (; x != 0; x >>= 1U) { if ((x & 1U) != 0U) { c++; } @@ -483,8 +483,8 @@ size_t popcount_slow(uint64_t x) { */ size_t ctz_slow(uint64_t x) { size_t c = 0; - while ((x & 1) == 0) { - x >>= 1; + while ((x & 1U) == 0) { + x >>= 1U; c++; } return c; diff --git a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp index babee6b726..e2a876661e 100644 --- a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp +++ b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp @@ -14,7 +14,7 @@ #include "AdjointDiff.hpp" #include "JacobianProd.hpp" -#include "StateVectorRaw.hpp" +#include "StateVectorRawCPU.hpp" #include "Util.hpp" #include "TestHelpers.hpp" @@ -53,10 +53,10 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z dy={0}", for (const auto &p : param) { auto ops = OpsData({"RX"}, {{p}}, {{0}}, {false}); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); cdata[0] = std::complex{1, 0}; - StateVectorRaw psi(cdata.data(), cdata.size()); + StateVectorRawCPU psi(cdata.data(), cdata.size()); std::vector tp{0}; std::vector> obs_ls{obs}; @@ -91,10 +91,10 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z dy={1}", for (const auto &p : param) { auto ops = OpsData({"RX"}, {{p}}, {{0}}, {false}); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); cdata[0] = std::complex{1, 0}; - StateVectorRaw psi(cdata.data(), cdata.size()); + StateVectorRawCPU psi(cdata.data(), cdata.size()); std::vector tp{0}; std::vector> obs_ls{obs}; @@ -129,10 +129,10 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z dy={0.4}", for (const auto &p : param) { auto ops = OpsData({"RX"}, {{p}}, {{0}}, {false}); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); cdata[0] = std::complex{1, 0}; - StateVectorRaw psi(cdata.data(), cdata.size()); + StateVectorRawCPU psi(cdata.data(), cdata.size()); std::vector tp{0}; std::vector> obs_ls{obs}; @@ -168,10 +168,10 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RY, Obs=X dy={0.4}", for (const auto &p : param) { auto ops = OpsData({"RY"}, {{p}}, {{0}}, {false}); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); cdata[0] = std::complex{1, 0}; - StateVectorRaw psi(cdata.data(), cdata.size()); + StateVectorRawCPU psi(cdata.data(), cdata.size()); std::vector tp{0}; std::vector> obs_ls{obs}; @@ -203,8 +203,8 @@ TEST_CASE( std::vector vjp_res(num_params); std::vector dy(num_obs, 1); - std::vector> cdata(0b1 << num_qubits); - StateVectorRaw psi(cdata.data(), cdata.size()); + std::vector> cdata(1U << num_qubits); + StateVectorRawCPU psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; auto obs1 = ObsDatum({"PauliZ"}, {{}}, {{0}}); @@ -239,8 +239,8 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], " std::vector vjp_res(num_params); std::vector dy(num_obs, 0.4); - std::vector> cdata(0b1 << num_qubits); - StateVectorRaw psi(cdata.data(), cdata.size()); + std::vector> cdata(1U << num_qubits); + StateVectorRawCPU psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; auto obs1 = ObsDatum({"PauliZ"}, {{}}, {{0}}); @@ -282,8 +282,8 @@ TEST_CASE( std::vector vjp_res(num_params); std::vector dy(num_obs, 1); - std::vector> cdata(0b1 << num_qubits); - StateVectorRaw psi(cdata.data(), cdata.size()); + std::vector> cdata(1U << num_qubits); + StateVectorRawCPU psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; auto obs1 = ObsDatum({"PauliZ"}, {{}}, {{0}}); @@ -322,8 +322,8 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], " std::vector vjp_res(num_params); std::vector dy(num_obs, 0.4); - std::vector> cdata(0b1 << num_qubits); - StateVectorRaw psi(cdata.data(), cdata.size()); + std::vector> cdata(1U << num_qubits); + StateVectorRawCPU psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; auto obs = ObsDatum({"PauliZ", "PauliZ", "PauliZ"}, @@ -361,8 +361,8 @@ TEST_CASE( std::vector vjp_res(num_params); std::vector dy(num_obs, 1); - std::vector> cdata(0b1 << num_qubits); - StateVectorRaw psi(cdata.data(), cdata.size()); + std::vector> cdata(1U << num_qubits); + StateVectorRawCPU psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; auto obs = ObsDatum({"PauliX", "PauliX", "PauliX"}, @@ -412,8 +412,8 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=Mixed, Obs=[XXX], " std::vector vjp_res(num_params); std::vector dy(num_obs, -0.2); - std::vector> cdata(0b1 << num_qubits); - StateVectorRaw psi(cdata.data(), cdata.size()); + std::vector> cdata(1U << num_qubits); + StateVectorRawCPU psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; auto obs = ObsDatum({"PauliX", "PauliX", "PauliX"}, @@ -480,7 +480,7 @@ TEST_CASE( std::vector> cdata{INVSQRT2(), -INVSQRT2()}; - StateVectorRaw psi(cdata.data(), cdata.size()); + StateVectorRawCPU psi(cdata.data(), cdata.size()); auto obs = ObsDatum({"PauliZ"}, {{}}, {{0}}); auto ops = OpsData( @@ -525,7 +525,7 @@ TEST_CASE( std::vector> cdata{ONE(), ZERO(), ZERO(), ZERO()}; - StateVectorRaw psi(cdata.data(), cdata.size()); + StateVectorRawCPU psi(cdata.data(), cdata.size()); auto obs = ObsDatum({"PauliX", "PauliZ"}, {{}, {}}, {{0}, {1}}); auto ops = OpsData( @@ -584,7 +584,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Mixed Ops, Obs and " std::vector> cdata{ONE(), ZERO(), ZERO(), ZERO()}; - StateVectorRaw psi(cdata.data(), cdata.size()); + StateVectorRawCPU psi(cdata.data(), cdata.size()); auto obs = ObsDatum({"PauliX", "PauliZ"}, {{}, {}}, {{0}, {1}}); auto ops = OpsData( @@ -622,4 +622,4 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Mixed Ops, Obs and " CHECK(-0.5 * expected[1] == Approx(vjp_res[1]).margin(1e-7)); CHECK(-0.5 * expected[2] == Approx(vjp_res[2]).margin(1e-7)); } -} \ No newline at end of file +} diff --git a/pennylane_lightning/src/util/Memory.hpp b/pennylane_lightning/src/util/Memory.hpp index dcd8dd359d..6a6df1f1ce 100644 --- a/pennylane_lightning/src/util/Memory.hpp +++ b/pennylane_lightning/src/util/Memory.hpp @@ -46,9 +46,7 @@ template struct AlignedAllocator { if (size == 0) { return nullptr; } - void *p = std::aligned_alloc( - alignment, - sizeof(T) * size); // NOLINT(cppcoreguidelines-owning-memory) + void *p = std::aligned_alloc(alignment, sizeof(T) * size); if (p == nullptr) { throw std::bad_alloc(); } @@ -56,8 +54,8 @@ template struct AlignedAllocator { } void deallocate(T *p, [[maybe_unused]] std::size_t size) noexcept { - std::free( - p); // NOLINT(hicpp-no-malloc, cppcoreguidelines-owning-memory) + // NOLINTNEXTLINE(hicpp-no-malloc) + std::free(p); } template void construct(U *ptr) { ::new ((void *)ptr) U(); } From ff394e57522d4a83d5cb828c407418caea3b7381 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 28 Feb 2022 23:25:05 -0500 Subject: [PATCH 03/94] Export to python --- pennylane_lightning/_serialize.py | 14 +- pennylane_lightning/lightning_qubit.py | 46 +- pennylane_lightning/src/bindings/Bindings.cpp | 56 +-- pennylane_lightning/src/bindings/Bindings.hpp | 186 +++++-- pennylane_lightning/src/gates/Constant.hpp | 27 +- .../src/gates/GateOperation.hpp | 16 +- .../src/gates/OpToMemberFuncPtr.hpp | 65 ++- .../cpu_kernels/GateImplementationsLM.hpp | 78 ++- .../cpu_kernels/GateImplementationsPI.hpp | 179 ++++++- .../src/simulator/CPUMemoryModel.hpp | 89 ++++ .../DefaultKernelsForStateVector.hpp | 458 +++++++++++++----- .../src/simulator/DispatchKeys.hpp | 30 +- .../src/simulator/DynamicDispatcher.cpp | 75 ++- .../src/simulator/DynamicDispatcher.hpp | 137 +++++- .../src/simulator/StateVectorBase.hpp | 93 ++-- .../src/simulator/StateVectorCPU.hpp | 21 +- .../src/simulator/StateVectorManagedCPU.hpp | 27 +- .../Test_DefaultKernelsForStateVector.cpp | 71 +++ .../src/tests/Test_DynamicDispatcher.cpp | 10 +- ...est_GateImplementations_CompareKernels.cpp | 61 ++- .../Test_GateImplementations_Inverse.cpp | 31 +- .../tests/Test_GateImplementations_Matrix.cpp | 256 +++++++--- .../src/tests/Test_OpToMemberFuncPtr.cpp | 11 +- pennylane_lightning/src/util/ConstantUtil.hpp | 7 + .../src/util/IntegerInterval.hpp | 94 ++++ pennylane_lightning/src/util/Memory.hpp | 12 +- pennylane_lightning/src/util/Util.hpp | 10 + tests/test_adjoint_jacobian.py | 14 +- tests/test_apply.py | 38 -- tests/test_array.py | 34 ++ tests/test_serialize.py | 40 -- tests/test_vjp.py | 60 +-- 32 files changed, 1619 insertions(+), 727 deletions(-) create mode 100644 pennylane_lightning/src/simulator/CPUMemoryModel.hpp create mode 100644 pennylane_lightning/src/util/IntegerInterval.hpp create mode 100644 tests/test_array.py diff --git a/pennylane_lightning/_serialize.py b/pennylane_lightning/_serialize.py index e82e121d81..db7d78cb8e 100644 --- a/pennylane_lightning/_serialize.py +++ b/pennylane_lightning/_serialize.py @@ -40,18 +40,6 @@ pass -def _is_lightning_gate(gate_name): - """Returns True if the gate (besides Matrix) is implemented - and exported from lightning. - - Args: - gate_name (str): the name of gate - """ - if gate_name == "Matrix": - return False - return gate_name in DEFAULT_KERNEL_FOR_OPS - - def _obs_has_kernel(obs: Observable) -> bool: """Returns True if the input observable has a supported kernel in the C++ backend. @@ -167,7 +155,7 @@ def _serialize_ops( name = single_op.name if not is_inverse else single_op.name[:-4] names.append(name) - if not _is_lightning_gate(name): + if getattr(StateVectorC128, name, None) is None: params.append([]) mats.append(single_op.matrix) diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py index f6dd12a3fe..014e4acb51 100644 --- a/pennylane_lightning/lightning_qubit.py +++ b/pennylane_lightning/lightning_qubit.py @@ -46,11 +46,13 @@ StateVectorC128, AdjointJacobianC128, VectorJacobianProductC128, - DEFAULT_KERNEL_FOR_OPS, - EXPORTED_KERNEL_OPS, + allocate_aligned_array, + get_alignment, + best_alignment, + CPUMemoryModel, ) - from ._serialize import _serialize_obs, _serialize_ops, _is_lightning_gate + from ._serialize import _serialize_obs, _serialize_ops CPP_BINARY_AVAILABLE = True except ModuleNotFoundError: @@ -101,25 +103,23 @@ class LightningQubit(DefaultQubit): _CPP_BINARY_AVAILABLE = True def __init__(self, wires, *, kernel_for_ops=None, shots=None, batch_obs=False): - self._kernel_for_ops = DEFAULT_KERNEL_FOR_OPS - if kernel_for_ops is not None: - if not isinstance(kernel_for_ops, dict): - raise ValueError("Argument kernel_for_ops must be a dictionary.") - - for gate_op, kernel in kernel_for_ops.items(): - if (kernel, gate_op) not in EXPORTED_KERNEL_OPS: - raise ValueError( - f"The given kernel {kernel} does not implement {gate_op} gate." - ) - self._kernel_for_ops[gate_op] = kernel - super().__init__(wires, shots=shots) self._batch_obs = batch_obs - # Lightning keeps a simulator memory of which is managed by C++ - # Note that as C++ manages the data, we need to copy from this array when - # the result is used outside of the module - self.sim_ = None + @staticmethod + def _asarray(arr, dtype=None): + arr = np.asarray(arr) + if not dtype: + dtype = arr.dtype + + # We allocate a new aligned memory and copy data to there if alignment or dtype mismatches + # Note that get_alignment does not neccsarily returns CPUMemoryModel(Unaligned) even for + # numpy allocated memory as the memory location happens to be aligend. + if int(get_alignment(arr)) < int(best_alignment()) or arr.dtype != dtype: + new_arr = allocate_aligned_array(arr.size, np.dtype(dtype)).reshape(arr.shape) + np.copyto(new_arr, arr) + arr = new_arr + return arr @classmethod def capabilities(cls): @@ -195,17 +195,13 @@ def apply_lightning(self, state, operations, dtype=np.complex128): for o in operations: name = o.name.split(".")[0] # The split is because inverse gates have .inv appended - if _is_lightning_gate(name): - kernel = self._kernel_for_ops[name] - method = getattr(sim, f"{name}_{kernel}".format(), None) - else: - method = None + method = getattr(sim, name, None) wires = self.wires.indices(o.wires) if method is None: # Inverse can be set to False since o.matrix is already in inverted form - method = getattr(sim, "applyMatrix_{}".format(self._kernel_for_ops["Matrix"])) + method = getattr(sim, "applyMatrix") method(o.matrix, wires, False) else: inv = o.inverse diff --git a/pennylane_lightning/src/bindings/Bindings.cpp b/pennylane_lightning/src/bindings/Bindings.cpp index c7747c6016..b935734087 100644 --- a/pennylane_lightning/src/bindings/Bindings.cpp +++ b/pennylane_lightning/src/bindings/Bindings.cpp @@ -19,6 +19,7 @@ #include "GateUtil.hpp" #include "SelectKernel.hpp" +#include "StateVectorManagedCPU.hpp" #include "pybind11/pybind11.h" @@ -27,7 +28,7 @@ namespace { using namespace Pennylane::Algorithms; using namespace Pennylane::Gates; -using Pennylane::StateVectorRaw; +using Pennylane::StateVectorRawCPU; using std::complex; using std::string; @@ -45,7 +46,7 @@ namespace py = pybind11; * @param m Pybind11 module. */ template -void lightning_class_bindings(py::module &m) { +void lightning_class_bindings(py::module_ &m) { // Enable module name to be based on size of complex datatype const std::string bitsize = std::to_string(sizeof(std::complex) * 8); @@ -53,13 +54,14 @@ void lightning_class_bindings(py::module &m) { //***********************************************************************// // StateVector //***********************************************************************// - + // std::string class_name = "StateVectorC" + bitsize; auto pyclass = - py::class_>(m, class_name.c_str()); - pyclass.def(py::init(&create)); + py::class_>(m, class_name.c_str()); + pyclass.def(py::init(&createRaw)); - registerKernelsToPyexport(pyclass); + registerGatesForStateVector>(pyclass); //***********************************************************************// // Observable @@ -221,7 +223,7 @@ void lightning_class_bindings(py::module &m) { .def("adjoint_jacobian", &AdjointJacobian::adjointJacobian) .def("adjoint_jacobian", [](AdjointJacobian &adj, - const StateVectorRaw &sv, + const StateVectorRawCPU &sv, const std::vector> &observables, const OpsData &operations, const std::vector &trainableParams, size_t num_params) { @@ -292,7 +294,7 @@ void lightning_class_bindings(py::module &m) { auto fn = v.vectorJacobianProduct(dy, num_params); return py::cpp_function( [fn, num_params]( - const StateVectorRaw &sv, + const StateVectorRawCPU &sv, const std::vector> &observables, const OpsData &operations, const std::vector &trainableParams) { @@ -309,7 +311,7 @@ void lightning_class_bindings(py::module &m) { class_name = "MeasuresC" + bitsize; py::class_>(m, class_name.c_str()) - .def(py::init &>()) + .def(py::init &>()) .def("probs", [](Measures &M, const std::vector &wires) { if (wires.empty()) { @@ -362,29 +364,19 @@ PYBIND11_MODULE(lightning_qubit_ops, // NOLINT: No control over Pybind internals &Gates::getIndicesAfterExclusion), "Get statevector indices for gate application"); - /* Add EXPORTED_KERNELS */ - std::vector> exported_kernel_ops; - - for (const auto kernel : kernels_to_pyexport) { - const auto kernel_name = lookup(kernel_id_name_pairs, kernel); - const auto implemented_gates = implementedGatesForKernel(kernel); - for (const auto gate_op : implemented_gates) { - const auto gate_name = - std::string(lookup(Constant::gate_names, gate_op)); - exported_kernel_ops.emplace_back(kernel_name, gate_name); - } - } - - m.attr("EXPORTED_KERNEL_OPS") = py::cast(exported_kernel_ops); - - /* Add DEFAULT_KERNEL_FOR_OPS */ - std::map default_kernel_ops_map; - for (const auto &[gate_op, name] : Constant::gate_names) { - const auto kernel = lookup(Constant::default_kernel_for_gates, gate_op); - const auto kernel_name = Util::lookup(kernel_id_name_pairs, kernel); - default_kernel_ops_map.emplace(std::string(name), kernel_name); - } - m.attr("DEFAULT_KERNEL_FOR_OPS") = py::cast(default_kernel_ops_map); + /* Add CPUMemoryModel enum class */ + py::enum_(m, "CPUMemoryModel") + .value("Unaligned", CPUMemoryModel::Unaligned) + .value("Aligned256", CPUMemoryModel::Aligned256) + .value("Aligned512", CPUMemoryModel::Aligned512); + + /* Add array */ + m.def("allocate_aligned_array", &allocateAlignedArray, + "Get numpy array whose underlying data is aligned."); + m.def("get_alignment", &getNumpyArrayAlignment, + "Get alignment of an underlying data for a numpy array."); + m.def("best_alignment", &bestCPUMemoryModel, + "Best memory alignment. for the simulator."); lightning_class_bindings(m); lightning_class_bindings(m); diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index a1845ba67d..84ef5f806c 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -18,10 +18,12 @@ */ #pragma once #include "AdjointDiff.hpp" +#include "CPUMemoryModel.hpp" #include "JacobianProd.hpp" #include "Measures.hpp" +#include "Memory.hpp" #include "OpToMemberFuncPtr.hpp" -#include "StateVectorRaw.hpp" +#include "StateVectorManagedCPU.hpp" #include "pybind11/complex.h" #include "pybind11/functional.h" @@ -45,8 +47,8 @@ namespace Pennylane { * @return StateVector `%StateVector` object. */ template -static auto create(pybind11::array_t> &numpyArray) - -> StateVectorRaw { +auto createRaw(const pybind11::array_t> &numpyArray) + -> StateVectorRawCPU { pybind11::buffer_info numpyArrayInfo = numpyArray.request(); if (numpyArrayInfo.ndim != 1) { @@ -59,15 +61,86 @@ static auto create(pybind11::array_t> &numpyArray) } auto *data_ptr = static_cast *>(numpyArrayInfo.ptr); - return StateVectorRaw( + return StateVectorRawCPU( {data_ptr, static_cast(numpyArrayInfo.shape[0])}); } template -static auto toNumpyArray(const StateVectorCPU &sv) - -> py::array_t> { - return py::array_t>( - {sv.getLength()}, {sizeof(PrecisionT)} sv.getData(), ); +auto createManaged( + const pybind11::array_t> &numpyArray) + -> StateVectorManagedCPU { + pybind11::buffer_info numpyArrayInfo = numpyArray.request(); + + if (numpyArrayInfo.ndim != 1) { + throw std::invalid_argument( + "NumPy array must be a 1-dimensional array"); + } + if (numpyArrayInfo.itemsize != sizeof(std::complex)) { + throw std::invalid_argument( + "NumPy array must be of type np.complex64 or np.complex128"); + } + auto *data_ptr = + static_cast *>(numpyArrayInfo.ptr); + return StateVectorManagedCPU( + {data_ptr, static_cast(numpyArrayInfo.shape[0])}); +} + +template +auto toNumpyArray(const StateVectorManagedCPU &sv) + -> pybind11::array_t> { + return pybind11::array_t>( + {sv.getLength()}, {2 * sizeof(PrecisionT)}, sv.getData()); +} + +auto getNumpyArrayAlignment(const pybind11::array &numpyArray) + -> CPUMemoryModel { + return getMemoryModel(numpyArray.request().ptr); +} + +void deallocateArray(void *ptr) { std::free(ptr); } + +/** + * @brief We return an numpy array whose underlying data is allocated by + * lightning. + * + * See https://github.com/pybind/pybind11/issues/1042#issuecomment-325941022 + * for capsule usage. + */ +auto allocateAlignedArray(size_t size, pybind11::dtype dt) -> pybind11::array { + + auto memory_model = bestCPUMemoryModel(); + + if (dt.is(pybind11::dtype::of())) { + void *ptr = std::aligned_alloc(getAlignment(memory_model), + sizeof(float) * size); + auto capsule = pybind11::capsule(ptr, &deallocateArray); + + return pybind11::array{dt, {size}, {sizeof(float)}, ptr, capsule}; + } else if (dt.is(pybind11::dtype::of())) { + void *ptr = std::aligned_alloc(getAlignment(memory_model), + sizeof(double) * size); + auto capsule = pybind11::capsule(ptr, &deallocateArray); + + return pybind11::array{dt, {size}, {sizeof(double)}, ptr, capsule}; + } else if (dt.is(pybind11::dtype::of>())) { + void *ptr = + std::aligned_alloc(getAlignment>(memory_model), + sizeof(std::complex) * size); + auto capsule = pybind11::capsule(ptr, &deallocateArray); + + return pybind11::array{ + dt, {size}, {sizeof(std::complex)}, ptr, capsule}; + } else if (dt.is(pybind11::dtype::of>())) { + void *ptr = + std::aligned_alloc(getAlignment>(memory_model), + sizeof(std::complex) * size); + auto capsule = pybind11::capsule(ptr, &deallocateArray); + + return pybind11::array{ + dt, {size}, {sizeof(std::complex)}, ptr, capsule}; + } else { + throw pybind11::type_error("Unsupported datatype."); + } } /** @@ -87,7 +160,7 @@ void apply(pybind11::array_t> &stateNumpyArray, const std::vector> &wires, const std::vector &inverse, const std::vector> ¶ms) { - auto state = create(stateNumpyArray); + auto state = createRaw(stateNumpyArray); state.applyOperations(ops, wires, inverse, params); } @@ -103,6 +176,7 @@ void apply(pybind11::array_t> &stateNumpyArray, * @tparam kernel Kernel to register * @tparam gate_op Gate operation */ +/* template constexpr auto getLambdaForKernelGateOp() { @@ -115,16 +189,14 @@ constexpr auto getLambdaForKernelGateOp() { if constexpr (gate_op != GateOperation::Matrix) { return - [](StateVectorRaw &st, const std::vector &wires, - bool inverse, const std::vector ¶ms) { - constexpr auto func_ptr = - GateOpToMemberFuncPtr::value; - callGateOps(func_ptr, st.getData(), st.getNumQubits(), wires, + [](StateVectorRawCPU &st, const std::vector +&wires, bool inverse, const std::vector ¶ms) { constexpr auto +func_ptr = GateOpToMemberFuncPtr::value; callGateOps(func_ptr, st.getData(), st.getNumQubits(), wires, inverse, params); }; } else { - return [](StateVectorRaw &st, + return [](StateVectorRawCPU &st, const py::array_t, py::array::c_style | py::array::forcecast> &matrix, @@ -135,7 +207,8 @@ constexpr auto getLambdaForKernelGateOp() { }; } }; - +*/ +/* /// @cond DEV template @@ -154,7 +227,7 @@ constexpr auto getGateOpLambdaPairsIter() { } } /// @endcond - +*/ /** * @brief Create a tuple of lambda functions to bind * @@ -162,10 +235,12 @@ constexpr auto getGateOpLambdaPairsIter() { * @tparam ParamT Floating point type of gate parameters * @tparam kernel Kernel to register */ +/* template constexpr auto getGateOpLambdaPairs() { return getGateOpLambdaPairsIter(); } +*/ /** * @brief For given kernel, register all implemented gate operations and apply @@ -176,17 +251,13 @@ constexpr auto getGateOpLambdaPairs() { * @tparam Kernel Kernel to register * @tparam PyClass Pybind11 class type */ -template +/* +template void registerImplementedGatesForKernel(PyClass &pyclass) { using namespace Pennylane::Gates; - const auto kernel_name = std::string(SelectKernel::name); - - constexpr auto gate_op_lambda_pairs = - getGateOpLambdaPairs(); auto registerToPyclass = - [&pyclass, &kernel_name](auto &&gate_op_lambda_pair) -> GateOperation { + [&pyclass](auto &&gate_op_lambda_pair) -> GateOperation { const auto &[gate_op, func] = gate_op_lambda_pair; if (gate_op == GateOperation::Matrix) { const std::string name = "applyMatrix_" + kernel_name; @@ -195,10 +266,14 @@ void registerImplementedGatesForKernel(PyClass &pyclass) { } else { const auto gate_name = std::string(lookup(Constant::gate_names, gate_op)); - const std::string name = gate_name + "_" + kernel_name; - const std::string doc = "Apply the " + gate_name + " gate using " + - kernel_name + " kernel."; - pyclass.def(name.c_str(), func, doc.c_str()); + const std::string doc = "Apply the " + gate_name + " gate."; + auto func = [&gate_name](StateVectorManagedCPU& sv, + const std::vector &wires, + bool inverse, + const std::vector ¶ms) { + sv.applyOperation(gate_name, wires, inverse, params); + } + pyclass.def(name.c_str(), , doc.c_str()); } return gate_op; }; @@ -209,29 +284,40 @@ void registerImplementedGatesForKernel(PyClass &pyclass) { }, gate_op_lambda_pairs); } - +*/ /// @cond DEV -template -void registerKernelsToPyexportIter(PyClass &pyclass) { - if constexpr (kernel_idx < kernels_to_pyexport.size()) { - constexpr auto kernel = kernels_to_pyexport[kernel_idx]; - registerImplementedGatesForKernel(pyclass); - registerKernelsToPyexportIter( - pyclass); +template +void registerGatesForStateVector(PyClass &pyclass) { + using Gates::GateOperation; + namespace Constant = Gates::Constant; + + static_assert(std::is_same_v); + + { // Register matrix + const std::string doc = "Apply a given matrix to wires."; + auto func = + [](SVType &st, + const pybind11::array_t, + pybind11::array::c_style | + pybind11::array::forcecast> &matrix, + const std::vector &wires, bool inverse = false) { + st.applyMatrix(static_cast *>( + matrix.request().ptr), + wires, inverse); + }; + pyclass.def("applyMatrix", func, doc.c_str()); } -} -/// @endcond -/** - * @brief register gates for each kernel in kernels_to_pyexport - * - * - * @tparam PrecisionT Floating point precision of underlying statevector data - * @tparam ParamT Floating point type of gate parameters - * @tparam PyClass Pyclass type - */ -template -void registerKernelsToPyexport(PyClass &pyclass) { - registerKernelsToPyexportIter(pyclass); + Util::for_each_enum([&pyclass](GateOperation gate_op) { + const auto gate_name = + std::string(lookup(Constant::gate_names, gate_op)); + const std::string doc = "Apply the " + gate_name + " gate."; + auto func = [gate_name = gate_name]( + SVType &sv, const std::vector &wires, + bool inverse, const std::vector ¶ms) { + sv.applyOperation(gate_name, wires, inverse, params); + }; + pyclass.def(gate_name.c_str(), func, doc.c_str()); + }); } } // namespace Pennylane diff --git a/pennylane_lightning/src/gates/Constant.hpp b/pennylane_lightning/src/gates/Constant.hpp index 38d086875d..62f0859829 100644 --- a/pennylane_lightning/src/gates/Constant.hpp +++ b/pennylane_lightning/src/gates/Constant.hpp @@ -25,14 +25,19 @@ namespace Pennylane::Gates::Constant { /** * @brief List of multi-qubit gates */ -[[maybe_unused]] constexpr std::array multi_qubit_gates{GateOperation::MultiRZ, - GateOperation::Matrix}; +[[maybe_unused]] constexpr std::array multi_qubit_gates{GateOperation::MultiRZ}; /** * @brief List of multi-qubit generators */ [[maybe_unused]] constexpr std::array multi_qubit_generators{ GeneratorOperation::MultiRZ, }; +/** + * @brief List of multi-qubit matrix operation + */ +[[maybe_unused]] constexpr std::array multi_qubit_matrix_ops{ + MatrixOperation::MultiQubitOp, +}; /** * @brief Gate names @@ -71,9 +76,7 @@ namespace Pennylane::Gates::Constant { "Toffoli"}, std::pair{GateOperation::CSWAP, "CSWAP"}, std::pair{GateOperation::MultiRZ, - "MultiRZ"}, - std::pair{GateOperation::Matrix, "Matrix"}, -}; + "MultiRZ"}}; /** * @brief Generator names. * @@ -108,6 +111,19 @@ namespace Pennylane::Gates::Constant { "GeneratorMultiRZ"}, }; +/** + * @brief Matrix names. + * + */ +[[maybe_unused]] constexpr std::array matrix_names = { + std::pair{MatrixOperation::SingleQubitOp, + "SingleQubitOp"}, + std::pair{MatrixOperation::TwoQubitOp, + "TwoQubitOp"}, + std::pair{MatrixOperation::MultiQubitOp, + "MultiQubitOp"}, +}; + /** * @brief Number of wires for gates besides multi-qubit gates */ @@ -236,7 +252,6 @@ namespace Pennylane::Gates::Constant { std::pair{GateOperation::Toffoli, KernelType::PI}, std::pair{GateOperation::CSWAP, KernelType::PI}, std::pair{GateOperation::MultiRZ, KernelType::LM}, - std::pair{GateOperation::Matrix, KernelType::PI}, }; /** * @brief Define which kernel to use for each generator operation. diff --git a/pennylane_lightning/src/gates/GateOperation.hpp b/pennylane_lightning/src/gates/GateOperation.hpp index 24d17d4406..709bf3f459 100644 --- a/pennylane_lightning/src/gates/GateOperation.hpp +++ b/pennylane_lightning/src/gates/GateOperation.hpp @@ -56,13 +56,11 @@ enum class GateOperation : uint32_t { CSWAP, /* Mutli-qubit gates */ MultiRZ, - /* General matrix */ - Matrix, /* END (placeholder) */ END }; /** - * @brief Enum class of all gate generators + * @brief Enum class for all gate generators */ enum class GeneratorOperation : uint32_t { BEGIN = 0, @@ -82,4 +80,16 @@ enum class GeneratorOperation : uint32_t { /* END (placeholder) */ END }; + +/** + * @brief Enum class for matrix operation + */ +enum class MatrixOperation : uint32_t { + BEGIN = 0, + SingleQubitOp = 0, + TwoQubitOp, + MultiQubitOp, + /* END (placeholder) */ + END +}; } // namespace Pennylane::Gates diff --git a/pennylane_lightning/src/gates/OpToMemberFuncPtr.hpp b/pennylane_lightning/src/gates/OpToMemberFuncPtr.hpp index 05808dd364..49c1611bb9 100644 --- a/pennylane_lightning/src/gates/OpToMemberFuncPtr.hpp +++ b/pennylane_lightning/src/gates/OpToMemberFuncPtr.hpp @@ -33,11 +33,8 @@ namespace Pennylane::Gates { template struct GateOpToMemberFuncPtr { - // raises compile error when used - static_assert( - gate_op != GateOperation::Matrix, - "GateOpToMemberFuncPtr is not defined for GateOperation::Matrix."); - static_assert(gate_op == GateOperation::Matrix, + // raises compile error when this struct is instantiated. + static_assert(sizeof(PrecisionT) == -1, "GateOpToMemberFuncPtr is not defined for the given gate. " "When you define a new GateOperation, check that you also " "have added the corresponding entry in " @@ -210,7 +207,7 @@ struct GateOpToMemberFuncPtr struct GeneratorOpToMemberFuncPtr { - // raises compile error when used + // raises compile error when this struct is instantiated. static_assert( sizeof(GateImplementation) == -1, "GeneratorOpToMemberFuncPtr is not defined for the given generator. " @@ -292,6 +289,33 @@ struct GeneratorOpToMemberFuncPtr; }; +/** + * @brief Matrix operation to member function pointer + */ +template +struct MatrixOpToMemberFuncPtr { + static_assert(sizeof(PrecisionT) == -1, "Unrecognized matrix operation"); +}; + +template +struct MatrixOpToMemberFuncPtr { + constexpr static auto value = + &GateImplementation::template applySingleQubitOp; +}; +template +struct MatrixOpToMemberFuncPtr { + constexpr static auto value = + &GateImplementation::template applyTwoQubitOp; +}; +template +struct MatrixOpToMemberFuncPtr { + constexpr static auto value = + &GateImplementation::template applyMultiQubitOp; +}; + /// @cond DEV namespace Internal { /** @@ -371,6 +395,15 @@ template struct GeneratorFuncPtr { using Type = PrecisionT (*)(std::complex *, size_t, const std::vector &, bool); }; + +/** + * @brief Pointer type for a matrix operation + */ +template struct MatrixFuncPtr { + using Type = void (*)(std::complex *, size_t, + const std::complex *, + const std::vector &, bool); +}; } // namespace Internal /// @endcond @@ -382,11 +415,17 @@ using GateFuncPtrT = typename Internal::GateFuncPtr::Type; /** - * @brief Convenient type alias for GeneratorFuncPtrT. + * @brief Convenient type alias for GeneratorFuncPtr. */ template using GeneratorFuncPtrT = typename Internal::GeneratorFuncPtr::Type; +/** + * @brief Convinient type alias for MatrixfuncPtr. + */ +template +using MatrixFuncPtrT = typename Internal::MatrixFuncPtr::Type; + /** * @defgroup Call gate operation with provided arguments * @@ -449,4 +488,16 @@ inline PrecisionT callGeneratorOps(GeneratorFuncPtrT func, const std::vector &wires, bool adj) { return func(data, num_qubits, wires, adj); } + +/** + * @brief Call a matrix operation. + * @tparam PrecisionT Floating point type for the state-vector. + */ +template +inline void callMatrixOp(MatrixFuncPtrT func, + std::complex *data, size_t num_qubits, + const std::complex matrix, + const std::vector &wires, bool adj) { + return func(data, num_qubits, matrix, wires, adj); +} } // namespace Pennylane::Gates diff --git a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp index 6e0060fc54..5618bffc7b 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp @@ -65,8 +65,7 @@ class GateImplementationsLM : public PauliGenerator { GateOperation::CRX, GateOperation::CRY, GateOperation::CRZ, GateOperation::CRot, GateOperation::IsingXX, GateOperation::IsingYY, - GateOperation::IsingZZ, GateOperation::MultiRZ, - GateOperation::Matrix}; + GateOperation::IsingZZ, GateOperation::MultiRZ}; constexpr static std::array implemented_generators = { GeneratorOperation::RX, @@ -83,6 +82,10 @@ class GateImplementationsLM : public PauliGenerator { GeneratorOperation::MultiRZ, }; + constexpr static std::array implemented_matrices = { + MatrixOperation::SingleQubitOp, MatrixOperation::TwoQubitOp, + MatrixOperation::MultiQubitOp}; + /** * @brief Apply a single qubit gate to the statevector. * @@ -95,9 +98,10 @@ class GateImplementationsLM : public PauliGenerator { template static inline void applySingleQubitOp(std::complex *arr, size_t num_qubits, - const std::complex *matrix, size_t wire, - bool inverse = false) { - const size_t rev_wire = num_qubits - wire - 1; + const std::complex *matrix, + const std::vector &wires, bool inverse = false) { + assert(wires.size() == 1); + const size_t rev_wire = num_qubits - wires[0] - 1; const size_t rev_wire_shift = (static_cast(1U) << rev_wire); const size_t wire_parity = fillTrailingOnes(rev_wire); const size_t wire_parity_inv = fillLeadingOnes(rev_wire + 1); @@ -146,6 +150,7 @@ class GateImplementationsLM : public PauliGenerator { applyTwoQubitOp(std::complex *arr, size_t num_qubits, const std::complex *matrix, const std::vector &wires, bool inverse = false) { + assert(wires.size() == 2); const size_t rev_wire0 = num_qubits - wires[1] - 1; const size_t rev_wire1 = num_qubits - wires[0] - 1; // Control qubit @@ -242,47 +247,38 @@ class GateImplementationsLM : public PauliGenerator { } template - static void applyMatrix(std::complex *arr, size_t num_qubits, - const std::complex *matrix, - const std::vector &wires, bool inverse) { + static void + applyMultiQubitOp(std::complex *arr, size_t num_qubits, + const std::complex *matrix, + const std::vector &wires, bool inverse) { assert(num_qubits >= wires.size()); - switch (wires.size()) { - case 1: - applySingleQubitOp(arr, num_qubits, matrix, wires[0], inverse); - break; - case 2: - applyTwoQubitOp(arr, num_qubits, matrix, wires, inverse); - break; - default: { - size_t dim = 1U << wires.size(); - std::vector indices; - indices.resize(dim); - - for (size_t k = 0; k < Util::exp2(num_qubits); k += dim) { - std::vector> coeffs_in(dim); - std::vector> coeffs_out(dim); - - for (size_t inner_idx = 0; inner_idx < dim; inner_idx++) { - size_t idx = k | inner_idx; - size_t n_wires = wires.size(); - for (size_t pos = 0; pos < n_wires; pos++) { - idx = bitswap(idx, n_wires - pos - 1, - num_qubits - wires[pos] - 1); - } - indices[inner_idx] = idx; - coeffs_in[inner_idx] = arr[idx]; - } + size_t dim = 1U << wires.size(); + std::vector indices; + indices.resize(dim); - Util::matrixVecProd( - matrix, coeffs_in.data(), coeffs_out.data(), dim, dim, - inverse ? Trans::Adjoint : Trans::NoTranspose); + for (size_t k = 0; k < Util::exp2(num_qubits); k += dim) { + std::vector> coeffs_in(dim); + std::vector> coeffs_out(dim); - for (size_t inner_idx = 0; inner_idx < dim; inner_idx++) { - arr[indices[inner_idx]] = coeffs_out[inner_idx]; + for (size_t inner_idx = 0; inner_idx < dim; inner_idx++) { + size_t idx = k | inner_idx; + size_t n_wires = wires.size(); + for (size_t pos = 0; pos < n_wires; pos++) { + idx = bitswap(idx, n_wires - pos - 1, + num_qubits - wires[pos] - 1); } + indices[inner_idx] = idx; + coeffs_in[inner_idx] = arr[idx]; + } + + Util::matrixVecProd(matrix, coeffs_in.data(), coeffs_out.data(), + dim, dim, + inverse ? Trans::Adjoint : Trans::NoTranspose); + + for (size_t inner_idx = 0; inner_idx < dim; inner_idx++) { + arr[indices[inner_idx]] = coeffs_out[inner_idx]; } - } } } @@ -520,7 +516,7 @@ class GateImplementationsLM : public PauliGenerator { (inverse) ? Gates::getRot(-omega, -theta, -phi) : Gates::getRot(phi, theta, omega); - applySingleQubitOp(arr, num_qubits, rotMat.data(), wires[0]); + applySingleQubitOp(arr, num_qubits, rotMat.data(), wires); } /* Two-qubit gates */ diff --git a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp index 82a0edf924..b4314411f5 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp @@ -68,8 +68,8 @@ class GateImplementationsPI : public PauliGenerator { GateOperation::IsingZZ, GateOperation::CRX, GateOperation::CRY, GateOperation::CRZ, GateOperation::CRot, GateOperation::Toffoli, - GateOperation::CSWAP, GateOperation::MultiRZ, - GateOperation::Matrix}; + GateOperation::CSWAP, GateOperation::MultiRZ}; + constexpr static std::array implemented_generators = { GeneratorOperation::RX, GeneratorOperation::RY, @@ -83,6 +83,153 @@ class GateImplementationsPI : public PauliGenerator { GeneratorOperation::CRZ, GeneratorOperation::ControlledPhaseShift}; + constexpr static std::array implemented_matrices = { + MatrixOperation::SingleQubitOp, MatrixOperation::TwoQubitOp, + MatrixOperation::MultiQubitOp}; + + /** + * @brief Apply a single qubit gate to the statevector. + * + * @param arr Pointer to the statevector. + * @param num_qubits Number of qubits. + * @param matrix Perfect square matrix in row-major order. + * @param wires Wires the gate applies to. + * @param inverse Indicate whether inverse should be taken. + */ + template + static inline void + applySingleQubitOp(std::complex *arr, size_t num_qubits, + const std::complex *matrix, + const std::vector &wires, bool inverse = false) { + assert(wires.size() == 1); + + const auto [indices, externalIndices] = GateIndices(wires, num_qubits); + + if (inverse) { + for (const size_t &externalIndex : externalIndices) { + std::complex *shiftedState = arr + externalIndex; + const std::complex v0 = shiftedState[indices[0]]; + const std::complex v1 = shiftedState[indices[1]]; + shiftedState[indices[0]] = + std::conj(matrix[0B00]) * v0 + + std::conj(matrix[0B10]) * + v1; // NOLINT(readability-magic-numbers) + shiftedState[indices[1]] = + std::conj(matrix[0B01]) * v0 + + std::conj(matrix[0B11]) * + v1; // NOLINT(readability-magic-numbers) + } + } else { + for (const size_t &externalIndex : externalIndices) { + std::complex *shiftedState = arr + externalIndex; + const std::complex v0 = shiftedState[indices[0]]; + const std::complex v1 = shiftedState[indices[1]]; + shiftedState[indices[0]] = + matrix[0B00] * v0 + + matrix[0B01] * v1; // NOLINT(readability-magic-numbers) + shiftedState[indices[1]] = + matrix[0B10] * v0 + + matrix[0B11] * v1; // NOLINT(readability-magic-numbers) + } + } + } + + /** + * @brief Apply a two qubit gate to the statevector. + * + * @param arr Pointer to the statevector. + * @param num_qubits Number of qubits. + * @param matrix Perfect square matrix in row-major order. + * @param wires Wires the gate applies to. + * @param inverse Indicate whether inverse should be taken. + */ + template + static inline void + applyTwoQubitOp(std::complex *arr, size_t num_qubits, + const std::complex *matrix, + const std::vector &wires, bool inverse = false) { + assert(wires.size() == 2); + const auto [indices, externalIndices] = GateIndices(wires, num_qubits); + + if (inverse) { + for (const size_t &externalIndex : externalIndices) { + std::complex *shiftedState = arr + externalIndex; + + const std::complex v00 = shiftedState[indices[0]]; + const std::complex v01 = shiftedState[indices[1]]; + const std::complex v10 = shiftedState[indices[2]]; + const std::complex v11 = shiftedState[indices[3]]; + + // NOLINTNEXTLINE(readability-magic-numbers) + shiftedState[indices[0]] = + std::conj(matrix[0b0000]) * v00 + + // NOLINTNEXTLINE(readability-magic-numbers) + std::conj(matrix[0b0100]) * v01 + + // NOLINTNEXTLINE(readability-magic-numbers) + std::conj(matrix[0b1000]) * v10 + + // NOLINTNEXTLINE(readability-magic-numbers) + std::conj(matrix[0b1100]) * v11; + // NOLINTNEXTLINE(readability-magic-numbers) + shiftedState[indices[1]] = + std::conj(matrix[0b0001]) * v00 + + // NOLINTNEXTLINE(readability-magic-numbers) + std::conj(matrix[0b0101]) * v01 + + // NOLINTNEXTLINE(readability-magic-numbers) + std::conj(matrix[0b1001]) * v10 + + // NOLINTNEXTLINE(readability-magic-numbers) + std::conj(matrix[0b1101]) * v11; + // NOLINTNEXTLINE(readability-magic-numbers) + shiftedState[indices[2]] = + std::conj(matrix[0b0010]) * v00 + + // NOLINTNEXTLINE(readability-magic-numbers) + std::conj(matrix[0b0110]) * v01 + + // NOLINTNEXTLINE(readability-magic-numbers) + std::conj(matrix[0b1010]) * v10 + + // NOLINTNEXTLINE(readability-magic-numbers) + std::conj(matrix[0b1110]) * v11; + // NOLINTNEXTLINE(readability-magic-numbers) + shiftedState[indices[3]] = + std::conj(matrix[0b0011]) * v00 + + // NOLINTNEXTLINE(readability-magic-numbers) + std::conj(matrix[0b0111]) * v01 + + // NOLINTNEXTLINE(readability-magic-numbers) + std::conj(matrix[0b1011]) * v10 + + // NOLINTNEXTLINE(readability-magic-numbers) + std::conj(matrix[0b1111]) * v11; + } + } else { + for (const size_t &externalIndex : externalIndices) { + std::complex *shiftedState = arr + externalIndex; + + const std::complex v00 = shiftedState[indices[0]]; + const std::complex v01 = shiftedState[indices[1]]; + const std::complex v10 = shiftedState[indices[2]]; + const std::complex v11 = shiftedState[indices[3]]; + + // NOLINTNEXTLINE(readability-magic-numbers) + shiftedState[indices[0]] = + matrix[0b0000] * v00 + matrix[0b0001] * v01 + + // NOLINTNEXTLINE(readability-magic-numbers) + matrix[0b0010] * v10 + matrix[0b0011] * v11; + // NOLINTNEXTLINE(readability-magic-numbers) + shiftedState[indices[1]] = + matrix[0b0100] * v00 + matrix[0b0101] * v01 + + // NOLINTNEXTLINE(readability-magic-numbers) + matrix[0b0110] * v10 + matrix[0b0111] * v11; + // NOLINTNEXTLINE(readability-magic-numbers) + shiftedState[indices[2]] = + matrix[0b1000] * v00 + matrix[0b1001] * v01 + + // NOLINTNEXTLINE(readability-magic-numbers) + matrix[0b1010] * v10 + matrix[0b1011] * v11; + // NOLINTNEXTLINE(readability-magic-numbers) + shiftedState[indices[3]] = + matrix[0b1100] * v00 + matrix[0b1101] * v01 + + // NOLINTNEXTLINE(readability-magic-numbers) + matrix[0b1110] * v10 + matrix[0b1111] * v11; + } + } + } + /** * @brief Apply a given matrix directly to the statevector. * @@ -93,9 +240,10 @@ class GateImplementationsPI : public PauliGenerator { * @param inverse Indicate whether inverse should be taken. */ template - static void applyMatrix(std::complex *arr, size_t num_qubits, - const std::complex *matrix, - const std::vector &wires, bool inverse) { + static void + applyMultiQubitOp(std::complex *arr, size_t num_qubits, + const std::complex *matrix, + const std::vector &wires, bool inverse) { const auto [indices, externalIndices] = GateIndices(wires, num_qubits); std::vector> v(indices.size()); @@ -134,27 +282,6 @@ class GateImplementationsPI : public PauliGenerator { } } - /** - * @brief Apply a given matrix directly to the statevector. - * - * @param arr Pointer to the statevector. - * @param num_qubits Number of qubits. - * @param matrix Perfect square matrix in row-major order. - * @param wires Wires the gate applies to. - * @param inverse Indicate whether inverse should be taken. - */ - template - static void applyMatrix(std::complex *arr, size_t num_qubits, - const std::vector> &matrix, - const std::vector &wires, bool inverse) { - if (matrix.size() != Util::exp2(2 * wires.size())) { - throw std::invalid_argument( - "The size of matrix does not match with the given " - "number of wires"); - } - applyMatrix(arr, num_qubits, matrix.data(), wires, inverse); - } - /* Single qubit operators */ template static void applyPauliX(std::complex *arr, size_t num_qubits, diff --git a/pennylane_lightning/src/simulator/CPUMemoryModel.hpp b/pennylane_lightning/src/simulator/CPUMemoryModel.hpp new file mode 100644 index 0000000000..97b60cf7f4 --- /dev/null +++ b/pennylane_lightning/src/simulator/CPUMemoryModel.hpp @@ -0,0 +1,89 @@ + +// Copyright 2022 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/** + * @file + * Define memory models for CPU + */ +#pragma once +#include "Macros.hpp" + +#include +#include + +namespace Pennylane { +enum class CPUMemoryModel : uint8_t { + Unaligned, + Aligned256, + Aligned512, + END, + BEGIN = Unaligned, +}; + +inline auto getMemoryModel(const void *ptr) -> CPUMemoryModel { + if ((reinterpret_cast(ptr) % 64) == 0) { + return CPUMemoryModel::Aligned512; + } + + if ((reinterpret_cast(ptr) % 32) == 0) { + return CPUMemoryModel::Aligned256; + } + + return CPUMemoryModel::Unaligned; +} + +constexpr inline auto bestCPUMemoryModel() -> CPUMemoryModel { + if constexpr (use_avx512f) { + return CPUMemoryModel::Aligned512; + } else if (use_avx2) { + return CPUMemoryModel::Aligned256; + } + return CPUMemoryModel::Unaligned; +} + +template +constexpr inline auto getAlignment(CPUMemoryModel memory_model) -> size_t { + switch (memory_model) { + case CPUMemoryModel::Unaligned: + return alignof(PrecisionT); + case CPUMemoryModel::Aligned256: + return 32U; + case CPUMemoryModel::Aligned512: + return 64U; + default: + break; + } + PL_UNREACHABLE; +} + +template +auto allocateMemory(CPUMemoryModel memory_model, size_t size) + // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) + -> std::unique_ptr { + switch (memory_model) { + case CPUMemoryModel::Unaligned: + // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) + return std::unique_ptr{new T[size]}; + case CPUMemoryModel::Aligned256: + // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) + return std::unique_ptr{new (std::align_val_t(32)) T[size]}; + case CPUMemoryModel::Aligned512: + // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) + return std::unique_ptr{new (std::align_val_t(64)) T[size]}; + default: + break; + } + PL_UNREACHABLE; +} +} // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp index 3259ad5861..5cb25cbd93 100644 --- a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp +++ b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp @@ -13,34 +13,99 @@ // limitations under the License. /** * @file + * Set/get Default kernels for statevector */ #include "DispatchKeys.hpp" #include "GateOperation.hpp" +#include "IntegerInterval.hpp" #include "KernelType.hpp" +#include "Util.hpp" #include #include +#include +#include namespace Pennylane { -inline auto larger_than(size_t size) { - return [=](size_t num_qubits) { return num_qubits > size; }; -} -inline auto larger_than_equal_to(size_t size) { - return [=](size_t num_qubits) { return num_qubits >= size; }; -} -inline auto less_than(size_t size) { - return [=](size_t num_qubits) { return num_qubits < size; }; -} -inline auto less_than_equal_to(size_t size) { - return [=](size_t num_qubits) { return num_qubits <= size; }; +///@cond DEV +struct DispatchElement { + uint32_t priority; + Util::IntegerInterval interval; + Gates::KernelType kernel; +}; + +inline bool lower_priority(const DispatchElement &lhs, + const DispatchElement &rhs) { + return lhs.priority < rhs.priority; } -inline auto in_between_closed(size_t l1, size_t l2) { - return [=](size_t num_qubits) { - return (l1 <= num_qubits) && (num_qubits <= l2); - }; + +inline bool higher_priority(const DispatchElement &lhs, + const DispatchElement &rhs) { + return lhs.priority > rhs.priority; } +/** + * @brief Maintain dispatch element using a vector decreasingly-ordered by + * priority. + */ +class PriorityDispatchSet { + private: + std::vector ordered_vec_; + + public: + [[nodiscard]] bool + conflict(uint32_t test_priority, + const Util::IntegerInterval &test_interval) const { + const auto test_elt = DispatchElement{test_priority, test_interval, + Gates::KernelType::None}; + const auto [b, e] = + std::equal_range(ordered_vec_.begin(), ordered_vec_.end(), test_elt, + higher_priority); + for (auto iter = b; iter != e; ++iter) { + if (!is_disjoint(iter->interval, test_interval)) { + return true; + } + } + return false; + } + + void insert(const DispatchElement &elt) { + const auto iter_to_insert = std::upper_bound( + ordered_vec_.begin(), ordered_vec_.end(), elt, &higher_priority); + ordered_vec_.insert(iter_to_insert, elt); + } + + template void emplace(Ts &&...args) { + const auto elt = DispatchElement{std::forward(args)...}; + const auto iter_to_insert = std::upper_bound( + ordered_vec_.begin(), ordered_vec_.end(), elt, &higher_priority); + ordered_vec_.insert(iter_to_insert, elt); + } + + [[nodiscard]] Gates::KernelType getKernel(size_t num_qubits) const { + for (const auto &elt : ordered_vec_) { + if (elt.interval(num_qubits)) { + return elt.kernel; + } + } + throw std::range_error( + "Cannot find a kernel for the given number of qubits."); + } + + void clearPriority(uint32_t remove_priority) { + const auto begin = std::lower_bound( + ordered_vec_.begin(), ordered_vec_.end(), remove_priority, + [](const auto &elt, uint32_t p) { return elt.priority > p; }); + const auto end = std::upper_bound( + ordered_vec_.begin(), ordered_vec_.end(), remove_priority, + [](uint32_t p, const auto &elt) { return p > elt.priority; }); + ordered_vec_.erase(begin, end); + } +}; + +///@endcond + class DefaultKernelsForStateVector { private: const static inline std::unordered_map, - Gates::KernelType>>> + std::pair, + PriorityDispatchSet, Util::PairHash> gate_kernel_map_; std::unordered_map< - Gates::GeneratorOperation, - std::vector, - Gates::KernelType>>> + std::pair, + PriorityDispatchSet, Util::PairHash> generator_kernel_map_; + std::unordered_map< + std::pair, + PriorityDispatchSet, Util::PairHash> + matrix_kernel_map_; + void registerDefaultGates() { using Gates::GateOperation; + using Util::full_domain; + using Util::in_between_closed; + using Util::larger_than; + using Util::larger_than_equal_to; + using Util::less_than; + using Util::less_than_equal_to; + auto &instance = *this; - auto all_qubit_numbers = []([[maybe_unused]] size_t num_qubits) { - return true; - }; + auto all_qubit_numbers = full_domain(); /* Single-qubit gates */ instance.assignKernelForGate(GateOperation::PauliX, all_threading, all_memory_model, all_qubit_numbers, @@ -126,15 +199,15 @@ class DefaultKernelsForStateVector { instance.assignKernelForGate( GateOperation::IsingXX, all_threading, // NOLINTNEXTLINE(readability-magic-numbers) - all_memory_model, less_than(12), Gates::KernelType::LM); + all_memory_model, less_than(12), Gates::KernelType::LM); instance.assignKernelForGate( GateOperation::IsingXX, all_threading, all_memory_model, // NOLINTNEXTLINE(readability-magic-numbers) - in_between_closed(12, 20), Gates::KernelType::PI); + in_between_closed(12, 20), Gates::KernelType::PI); instance.assignKernelForGate( GateOperation::IsingXX, all_threading, // NOLINTNEXTLINE(readability-magic-numbers) - all_memory_model, larger_than(20), Gates::KernelType::LM); + all_memory_model, larger_than(20), Gates::KernelType::LM); instance.assignKernelForGate(GateOperation::IsingYY, all_threading, all_memory_model, all_qubit_numbers, @@ -166,12 +239,18 @@ class DefaultKernelsForStateVector { } void registerDefaultGenerators() { + using Gates::GateOperation; using Gates::GeneratorOperation; using Gates::KernelType; + using Util::full_domain; + using Util::in_between_closed; + using Util::larger_than; + using Util::larger_than_equal_to; + using Util::less_than; + using Util::less_than_equal_to; + auto &instance = *this; - auto all_qubit_numbers = []([[maybe_unused]] size_t num_qubits) { - return true; - }; + auto all_qubit_numbers = full_domain(); instance.assignKernelForGenerator(GeneratorOperation::PhaseShift, all_threading, all_memory_model, @@ -211,9 +290,35 @@ class DefaultKernelsForStateVector { all_qubit_numbers, KernelType::LM); } + void registerDefaultMatrices() { + using Gates::GateOperation; + using Gates::KernelType; + using Gates::MatrixOperation; + using Util::full_domain; + using Util::in_between_closed; + using Util::larger_than; + using Util::larger_than_equal_to; + using Util::less_than; + using Util::less_than_equal_to; + + auto &instance = *this; + auto all_qubit_numbers = full_domain(); + + instance.assignKernelForMatrix(MatrixOperation::SingleQubitOp, + all_threading, all_memory_model, + all_qubit_numbers, KernelType::LM); + instance.assignKernelForMatrix(MatrixOperation::TwoQubitOp, + all_threading, all_memory_model, + all_qubit_numbers, KernelType::LM); + instance.assignKernelForMatrix(MatrixOperation::MultiQubitOp, + all_threading, all_memory_model, + all_qubit_numbers, KernelType::LM); + } + DefaultKernelsForStateVector() { registerDefaultGates(); registerDefaultGenerators(); + registerDefaultMatrices(); } public: @@ -230,108 +335,197 @@ class DefaultKernelsForStateVector { return instance; } - void - assignKernelForGate(Gates::GateOperation gate_op, Threading threading, - CPUMemoryModel memory_model, - const std::function &num_qubits_criterion, - Gates::KernelType kernel) { + void assignKernelForGate(Gates::GateOperation gate_op, Threading threading, + CPUMemoryModel memory_model, uint32_t priority, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { + if (std::find(allowed_kernels.at(memory_model).cbegin(), + allowed_kernels.at(memory_model).cend(), + kernel) == allowed_kernels.at(memory_model).cend()) { + throw std::invalid_argument("The given kernel is now allowed for " + "the given memory model."); + } + const auto dispatch_key = toDispatchKey(threading, memory_model); + auto &set = gate_kernel_map_[std::make_pair(gate_op, dispatch_key)]; + + if (set.conflict(priority, interval)) { + throw std::invalid_argument("The given interval conflicts with " + "existing intervals."); + } + set.emplace(priority, interval, kernel); + } + + void assignKernelForGate(Gates::GateOperation gate_op, + [[maybe_unused]] AllThreading dummy, + CPUMemoryModel memory_model, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { + /* Priority for all threading is 1 */ + Util::for_each_enum([=](Threading threading) { + assignKernelForGate(gate_op, threading, memory_model, 1, interval, + kernel); + }); + } + + void assignKernelForGate(Gates::GateOperation gate_op, Threading threading, + [[maybe_unused]] AllMemoryModel dummy, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { + /* Priority for all memory model is 2 */ + Util::for_each_enum([=](CPUMemoryModel memory_model) { + assignKernelForGate(gate_op, threading, memory_model, 2, interval, + kernel); + }); + } + + void assignKernelForGate(Gates::GateOperation gate_op, + [[maybe_unused]] AllThreading dummy1, + [[maybe_unused]] AllMemoryModel dummy2, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { + /* Priority is 0 */ + Util::for_each_enum( + [=](Threading threading, CPUMemoryModel memory_model) { + assignKernelForGate(gate_op, threading, memory_model, 0, + interval, kernel); + }); + } + + void assignKernelForGenerator(Gates::GeneratorOperation gntr_op, + Threading threading, + CPUMemoryModel memory_model, + uint32_t priority, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { if (std::find(allowed_kernels.at(memory_model).cbegin(), allowed_kernels.at(memory_model).cend(), kernel) == allowed_kernels.at(memory_model).cend()) { throw std::invalid_argument("The given kernel is now allowed for " "the given memory model."); } - gate_kernel_map_[gate_op].emplace_back( - toDispatchKey(threading, memory_model), num_qubits_criterion, - kernel); + const auto dispatch_key = toDispatchKey(threading, memory_model); + auto &set = + generator_kernel_map_[std::make_pair(gntr_op, dispatch_key)]; + + if (set.conflict(priority, interval)) { + throw std::invalid_argument("The given interval conflicts with " + "existing intervals."); + } + set.emplace(priority, interval, kernel); } - void - assignKernelForGate(Gates::GateOperation gate_op, - [[maybe_unused]] AllThreading dummy, - CPUMemoryModel memory_model, - const std::function &num_qubits_criterion, - Gates::KernelType kernel) { + void assignKernelForGenerator(Gates::GeneratorOperation gntr_op, + [[maybe_unused]] AllThreading dummy, + CPUMemoryModel memory_model, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { Util::for_each_enum([=](Threading threading) { - assignKernelForGate(gate_op, threading, memory_model, - num_qubits_criterion, kernel); + assignKernelForGenerator(gntr_op, threading, memory_model, 1, + interval, kernel); }); } - void - assignKernelForGate(Gates::GateOperation gate_op, Threading threading, - [[maybe_unused]] AllMemoryModel dummy, - const std::function &num_qubits_criterion, - Gates::KernelType kernel) { + void assignKernelForGenerator(Gates::GeneratorOperation gntr_op, + Threading threading, + [[maybe_unused]] AllMemoryModel dummy, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { Util::for_each_enum([=](CPUMemoryModel memory_model) { - assignKernelForGate(gate_op, threading, memory_model, - num_qubits_criterion, kernel); + assignKernelForGenerator(gntr_op, threading, memory_model, 2, + interval, kernel); }); } - void - assignKernelForGate(Gates::GateOperation gate_op, - [[maybe_unused]] AllThreading dummy1, - [[maybe_unused]] AllMemoryModel dummy2, - const std::function &num_qubits_criterion, - Gates::KernelType kernel) { + void assignKernelForGenerator(Gates::GeneratorOperation gntr_op, + [[maybe_unused]] AllThreading dummy1, + [[maybe_unused]] AllMemoryModel dummy2, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { Util::for_each_enum( [=](Threading threading, CPUMemoryModel memory_model) { - assignKernelForGate(gate_op, threading, memory_model, - num_qubits_criterion, kernel); + assignKernelForGenerator(gntr_op, threading, memory_model, 0, + interval, kernel); }); } - void assignKernelForGenerator( - Gates::GeneratorOperation gntr_op, Threading threading, - CPUMemoryModel memory_model, - const std::function &num_qubits_criterion, - Gates::KernelType kernel) { + void assignKernelForMatrix(Gates::MatrixOperation mat_op, + Threading threading, CPUMemoryModel memory_model, + uint32_t priority, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { if (std::find(allowed_kernels.at(memory_model).cbegin(), allowed_kernels.at(memory_model).cend(), kernel) == allowed_kernels.at(memory_model).cend()) { throw std::invalid_argument("The given kernel is now allowed for " "the given memory model."); } - generator_kernel_map_[gntr_op].emplace_back( - toDispatchKey(threading, memory_model), num_qubits_criterion, - kernel); + const auto dispatch_key = toDispatchKey(threading, memory_model); + auto &set = matrix_kernel_map_[std::make_pair(mat_op, dispatch_key)]; + + if (set.conflict(priority, interval)) { + throw std::invalid_argument("The given interval conflicts with " + "existing intervals."); + } + set.emplace(priority, interval, kernel); } - void assignKernelForGenerator( - Gates::GeneratorOperation gntr_op, [[maybe_unused]] AllThreading dummy, - CPUMemoryModel memory_model, - const std::function &num_qubits_criterion, - Gates::KernelType kernel) { + void assignKernelForMatrix(Gates::MatrixOperation mat_op, + [[maybe_unused]] AllThreading dummy, + CPUMemoryModel memory_model, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { Util::for_each_enum([=](Threading threading) { - assignKernelForGenerator(gntr_op, threading, memory_model, - num_qubits_criterion, kernel); + assignKernelForMatrix(mat_op, threading, memory_model, 1, interval, + kernel); }); } - void assignKernelForGenerator( - Gates::GeneratorOperation gntr_op, Threading threading, - [[maybe_unused]] AllMemoryModel dummy, - const std::function &num_qubits_criterion, - Gates::KernelType kernel) { + void assignKernelForMatrix(Gates::MatrixOperation mat_op, + Threading threading, + [[maybe_unused]] AllMemoryModel dummy, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { Util::for_each_enum([=](CPUMemoryModel memory_model) { - assignKernelForGenerator(gntr_op, threading, memory_model, - num_qubits_criterion, kernel); + assignKernelForMatrix(mat_op, threading, memory_model, 2, interval, + kernel); }); } - void assignKernelForGenerator( - Gates::GeneratorOperation gntr_op, [[maybe_unused]] AllThreading dummy1, - [[maybe_unused]] AllMemoryModel dummy2, - const std::function &num_qubits_criterion, - Gates::KernelType kernel) { + void assignKernelForMatrix(Gates::MatrixOperation mat_op, + [[maybe_unused]] AllThreading dummy1, + [[maybe_unused]] AllMemoryModel dummy2, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { Util::for_each_enum( [=](Threading threading, CPUMemoryModel memory_model) { - assignKernelForGenerator(gntr_op, threading, memory_model, - num_qubits_criterion, kernel); + assignKernelForMatrix(mat_op, threading, memory_model, 0, + interval, kernel); }); } + /** + * @brief Create default kernels for all gates + * @param num_qubits Number of qubits + * @param threading Threading context + * @param memory_model Memory model of the underlying data + */ + auto getGateKernelMap(size_t num_qubits, Threading threading, + CPUMemoryModel memory_model) const + -> std::unordered_map { + uint32_t dispatch_key = toDispatchKey(threading, memory_model); + + std::unordered_map + kernel_for_gates; + + Util::for_each_enum( + [&](Gates::GateOperation gate_op) { + const auto key = std::make_pair(gate_op, dispatch_key); + const auto &set = gate_kernel_map_.at(key); + kernel_for_gates.emplace(gate_op, set.getKernel(num_qubits)); + }); + return kernel_for_gates; + } + /** * @brief Create default kernels for all generators * @param num_qubits Number of qubits @@ -346,58 +540,54 @@ class DefaultKernelsForStateVector { std::unordered_map kernel_for_generators; - for (auto generator = Gates::GeneratorOperation::BEGIN; - generator != Gates::GeneratorOperation::END; - generator = static_cast( - static_cast(generator) + 1)) { - - const auto iter = - std::find_if(generator_kernel_map_.at(generator).cbegin(), - generator_kernel_map_.at(generator).cend(), - [dispatch_key = dispatch_key, - num_qubits = num_qubits](const auto &t) { - return (std::get<0>(t) == dispatch_key && - std::get<1>(t)(num_qubits)); - }); - if (iter == generator_kernel_map_.at(generator).cend()) { - throw std::range_error("Cannot find registered kernel for a " - "dispatch key and number of qubits."); - } - kernel_for_generators.emplace(generator, std::get<2>(*iter)); - } + Util::for_each_enum( + [&](Gates::GeneratorOperation gntr_op) { + const auto key = std::make_pair(gntr_op, dispatch_key); + const auto &set = generator_kernel_map_.at(key); + kernel_for_generators.emplace(gntr_op, + set.getKernel(num_qubits)); + }); return kernel_for_generators; } - auto getGateKernelMap(size_t num_qubits, Threading threading, - CPUMemoryModel memory_model) const - -> std::unordered_map { + /** + * @brief Create default kernels for all matrix operations + * @param num_qubits Number of qubits + * @param threading Threading context + * @param memory_model Memory model of the underlying data + */ + auto getMatrixKernelMap(size_t num_qubits, Threading threading, + CPUMemoryModel memory_model) const + -> std::unordered_map { uint32_t dispatch_key = toDispatchKey(threading, memory_model); - std::unordered_map - kernel_for_gates; + std::unordered_map + kernel_for_matrices; - for (auto gate = Gates::GateOperation::BEGIN; - gate != Gates::GateOperation::END; - gate = static_cast( - static_cast(gate) + 1)) { + Util::for_each_enum( + [&](Gates::MatrixOperation mat_op) { + const auto key = std::make_pair(mat_op, dispatch_key); + const auto &set = matrix_kernel_map_.at(key); + kernel_for_matrices.emplace(mat_op, set.getKernel(num_qubits)); + }); + return kernel_for_matrices; + } - if (gate == Gates::GateOperation::Matrix) { - continue; - } + void removeKernelForGenerator(Gates::GateOperation gate_op, + Threading threading, + CPUMemoryModel memory_model, + uint32_t priority) { + uint32_t dispatch_key = toDispatchKey(threading, memory_model); + gate_kernel_map_[std::make_pair(gate_op, dispatch_key)].clearPriority( + priority); + } - const auto iter = std::find_if( - gate_kernel_map_.at(gate).cbegin(), - gate_kernel_map_.at(gate).cend(), [=](const auto &t) { - return (std::get<0>(t) == dispatch_key && - std::get<1>(t)(num_qubits)); - }); - if (iter == gate_kernel_map_.at(gate).cend()) { - throw std::range_error("Cannot find registered kernel for a " - "dispatch key and number of qubits."); - } - kernel_for_gates.emplace(gate, std::get<2>(*iter)); - } - return kernel_for_gates; + void removeKernelForMatrix(Gates::MatrixOperation mat_op, + Threading threading, CPUMemoryModel memory_model, + uint32_t priority) { + uint32_t dispatch_key = toDispatchKey(threading, memory_model); + matrix_kernel_map_[std::make_pair(mat_op, dispatch_key)].clearPriority( + priority); } }; } // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/DispatchKeys.hpp b/pennylane_lightning/src/simulator/DispatchKeys.hpp index 08265c9c59..a6d2f4ba94 100644 --- a/pennylane_lightning/src/simulator/DispatchKeys.hpp +++ b/pennylane_lightning/src/simulator/DispatchKeys.hpp @@ -17,6 +17,7 @@ */ #pragma once +#include "CPUMemoryModel.hpp" #include "Macros.hpp" #include @@ -33,14 +34,6 @@ enum class Threading : uint8_t { BEGIN = SingleThread, }; -enum class CPUMemoryModel : uint8_t { - Unaligned, - Aligned256, - Aligned512, - END, - BEGIN = Unaligned, -}; - constexpr uint32_t toDispatchKey(Threading threading, CPUMemoryModel memory_model) { /* Threading is in higher priority */ @@ -48,18 +41,6 @@ constexpr uint32_t toDispatchKey(Threading threading, static_cast(memory_model); } -inline auto getMemoryModel(const void *ptr) -> CPUMemoryModel { - if ((reinterpret_cast(ptr) % 64) == 0) { - return CPUMemoryModel::Aligned512; - } - - if ((reinterpret_cast(ptr) % 32) == 0) { - return CPUMemoryModel::Aligned256; - } - - return CPUMemoryModel::Unaligned; -} - /** * @brief Choose the best threading based on the current context. */ @@ -75,13 +56,4 @@ inline auto bestThreading() -> Threading { return Threading::SingleThread; } -constexpr inline auto bestCPUMemoryModel() -> CPUMemoryModel { - if constexpr (use_avx512f) { - return CPUMemoryModel::Aligned512; - } else if (use_avx2) { - return CPUMemoryModel::Aligned256; - } - return CPUMemoryModel::Unaligned; -} - } // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.cpp b/pennylane_lightning/src/simulator/DynamicDispatcher.cpp index 22187d4fcf..4d4851d3d6 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.cpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.cpp @@ -71,19 +71,11 @@ constexpr auto constructGateOpsFunctorTupleIter() { } else if (gate_idx < GateImplementation::implemented_gates.size()) { constexpr auto gate_op = GateImplementation::implemented_gates[gate_idx]; - if constexpr (gate_op == Gates::GateOperation::Matrix) { - /* GateOperation::Matrix is not supported for dynamic dispatch now - */ - return constructGateOpsFunctorTupleIter< - PrecisionT, ParamT, GateImplementation, gate_idx + 1>(); - } else { - return prepend_to_tuple( - std::pair{gate_op, - gateOpToFunctor()}, - constructGateOpsFunctorTupleIter< - PrecisionT, ParamT, GateImplementation, gate_idx + 1>()); - } + return prepend_to_tuple( + std::pair{gate_op, gateOpToFunctor()}, + constructGateOpsFunctorTupleIter< + PrecisionT, ParamT, GateImplementation, gate_idx + 1>()); } } /** @@ -105,6 +97,25 @@ constexpr auto constructGeneratorOpsFunctorTupleIter() { PrecisionT, GateImplementation, gntr_idx + 1>()); } } +/** + * @brief Internal recustion function for constructMatrixOpsFunctorTuple + */ +template +constexpr auto constructMatrixOpsFunctorTupleIter() { + if constexpr (mat_idx == GateImplementation::implemented_matrices.size()) { + return std::tuple{}; + } else if (mat_idx < GateImplementation::implemented_matrices.size()) { + constexpr auto mat_op = + GateImplementation::implemented_matrices[mat_idx]; + return prepend_to_tuple( + std::pair{ + mat_op, + Gates::MatrixOpToMemberFuncPtr::value}, + constructMatrixOpsFunctorTupleIter()); + } +} /// @endcond /** @@ -122,13 +133,22 @@ constexpr auto gate_op_functor_tuple = constructGateOpsFunctorTupleIter< * @brief Tuple of gate operation and function pointer pairs. * * @tparam PrecisionT Floating point precision of underlying statevector data - * @tparam ParamT Floating point type of gate parameters * @tparam GateImplementation Gate implementation class. */ template constexpr auto generator_op_functor_tuple = constructGeneratorOpsFunctorTupleIter(); +/** + * @brief Tuple of matrix operation and function pointer pairs + * + * @tparam PrecisionT Floating point precision of underlying statevector data + * @tparam GateImplementation Gate implementation class. + */ +template +constexpr auto matrix_op_functor_tuple = + constructMatrixOpsFunctorTupleIter(); + /** * @brief Register all implemented gates for a given kernel * @@ -172,12 +192,36 @@ void registerAllImplementedGeneratorOps() { return gntr_op; }; - [[maybe_unused]] const auto registerd_gate_ops = std::apply( + [[maybe_unused]] const auto registerd_gntr_ops = std::apply( [®isterGeneratorToDispatcher](auto... elt) { return std::make_tuple(registerGeneratorToDispatcher(elt)...); }, generator_op_functor_tuple); } +/** + * @brief Register all implemented matrix oepration for a given kernel + * + * @tparam PrecisionT Floating point precision of underlying statevector data + * @tparam GateImplementation Gate implementation class. + */ +template +void registerAllImplementedMatrixOps() { + auto &dispatcher = DynamicDispatcher::getInstance(); + + auto registerMatrixToDispatcher = [&dispatcher]( + const auto &mat_op_func_pair) { + const auto &[mat_op, func] = mat_op_func_pair; + dispatcher.registerMatrixOperation(mat_op, + GateImplementation::kernel_id, func); + return mat_op; + }; + + [[maybe_unused]] const auto registerd_mat_ops = std::apply( + [®isterMatrixToDispatcher](auto... elt) { + return std::make_tuple(registerMatrixToDispatcher(elt)...); + }, + matrix_op_functor_tuple); +} /// @cond DEV /** @@ -193,6 +237,7 @@ void registerKernelIter() { typename TypeList::Type>(); registerAllImplementedGeneratorOps(); + registerAllImplementedMatrixOps(); registerKernelIter(); } } diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp index 284e221002..46fc68ab81 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp @@ -24,6 +24,9 @@ #include "Error.hpp" #include "GateUtil.hpp" #include "KernelType.hpp" +#include "Macros.hpp" +#include "OpToMemberFuncPtr.hpp" +#include "Util.hpp" #include #include @@ -35,12 +38,6 @@ /// @cond DEV namespace Pennylane::Internal { -struct PairHash { - template - size_t operator()(const std::pair &p) const { - return std::hash()(p.first) ^ std::hash()(p.second); - } -}; /** * @brief Register all implemented gates for all available kernels. * @@ -82,23 +79,27 @@ template class DynamicDispatcher { const std::vector & /*wires*/, bool /*inverse*/, const std::vector & /*params*/)>; - using GeneratorFunc = PrecisionT (*)(std::complex * /*data*/, - size_t /*num_qubits*/, - const std::vector & /*wires*/, - bool /*adjoint*/); + using GeneratorFunc = Gates::GeneratorFuncPtrT; + using MatrixFunc = std::function *, size_t, + const std::complex *, + const std::vector &, bool)>; private: std::unordered_map str_to_gates_; std::unordered_map str_to_gntrs_; std::unordered_map, - GateFunc, Internal::PairHash> + GateFunc, Util::PairHash> gates_; std::unordered_map, - GeneratorFunc, Internal::PairHash> + GeneratorFunc, Util::PairHash> generators_; + std::unordered_map, + MatrixFunc, Util::PairHash> + matrices_; + constexpr static auto removeGeneratorPrefix(std::string_view op_name) -> std::string_view { constexpr std::string_view prefix = "Generator"; @@ -161,6 +162,18 @@ template class DynamicDispatcher { std::forward(func)); } + /** + * @brief Register a new matrix operation. Can pass a custom + * kernel + */ + // template + void registerMatrixOperation(Gates::MatrixOperation mat_op, + Gates::KernelType kernel, MatrixFunc func) { + // FunctionType&& func) { + // TODO: Add mutex when we go to multithreading + matrices_.emplace(std::make_pair(mat_op, kernel), func); + } + /** * @brief Apply a single gate to the state-vector using the given kernel. * @@ -180,7 +193,8 @@ template class DynamicDispatcher { gates_.find(std::make_pair(strToGateOp(op_name), kernel)); if (iter == gates_.cend()) { throw std::invalid_argument( - "Cannot find a gate with a given name \"" + op_name + "\"."); + "The gate " + op_name + + " is not registered for the given kernel"); } (iter->second)(data, num_qubits, wires, inverse, params); } @@ -203,10 +217,10 @@ template class DynamicDispatcher { const auto iter = gates_.find(std::make_pair(gate_op, kernel)); if (iter == gates_.cend()) { throw std::invalid_argument( - std::string("Cannot find a gate with a given name \"") + + std::string("The gate ") + std::string( Util::lookup(Gates::Constant::gate_names, gate_op)) + - "\"."); + " is not registered for the given kernel"); } (iter->second)(data, num_qubits, wires, inverse, params); } @@ -221,11 +235,12 @@ template class DynamicDispatcher { * @param inverse List of inverses * @param params List of parameters */ - void applyOperations(CFP_t *data, size_t num_qubits, - const std::vector &ops, - const std::vector> &wires, - const std::vector &inverse, - const std::vector> ¶ms) { + void + applyOperations(CFP_t *data, size_t num_qubits, + const std::vector &ops, + const std::vector> &wires, + const std::vector &inverse, + const std::vector> ¶ms) const { const size_t numOperations = ops.size(); if (numOperations != wires.size() || numOperations != params.size()) { throw std::invalid_argument( @@ -252,7 +267,7 @@ template class DynamicDispatcher { void applyOperations(CFP_t *data, size_t num_qubits, const std::vector &ops, const std::vector> &wires, - const std::vector &inverse) { + const std::vector &inverse) const { const size_t numOperations = ops.size(); if (numOperations != wires.size()) { throw std::invalid_argument( @@ -265,6 +280,86 @@ template class DynamicDispatcher { } } + /** + * @brief Apply a given matrix directly to the statevector. + * + * @param arr Pointer to the statevector. + * @param num_qubits Number of qubits. + * @param matrix Perfect square matrix in row-major order. + * @param wires Wires the gate applies to. + * @param inverse Indicate whether inverse should be taken. + */ + void applyMatrix(Gates::KernelType kernel, CFP_t *data, + Gates::MatrixOperation mat_op, size_t num_qubits, + const std::complex *matrix, + const std::vector &wires, bool inverse) const { + assert(num_qubits >= wires.size()); + + switch (mat_op) { + case Gates::MatrixOperation::SingleQubitOp: + assert(wires.size() == 1); + break; + case Gates::MatrixOperation::TwoQubitOp: + assert(wires.size() == 2); + break; + default: + break; + } + const auto iter = matrices_.find(std::make_pair(mat_op, kernel)); + if (iter == matrices_.end()) { + throw std::invalid_argument( + std::string( + Util::lookup(Gates::Constant::matrix_names, mat_op)) + + " is not registered for the given kernel"); + } + (iter->second)(data, num_qubits, matrix, wires, inverse); + } + + /** + * @brief Apply a given matrix directly to the statevector. + * + * @param arr Pointer to the statevector. + * @param num_qubits Number of qubits. + * @param matrix Perfect square matrix in row-major order. + * @param wires Wires the gate applies to. + * @param inverse Indicate whether inverse should be taken. + */ + void applyMatrix(Gates::KernelType kernel, CFP_t *data, + Gates::MatrixOperation mat_op, size_t num_qubits, + const std::complex &matrix, + const std::vector &wires, bool inverse) const { + if (matrix.size() != Util::exp2(2 * wires.size())) { + throw std::invalid_argument( + "The size of matrix does not match with the given " + "number of wires"); + } + applyMatrix(kernel, data, num_qubits, matrix.data(), wires, inverse); + } + + /** + * @brief Apply a single generator to the state-vector using the given + * kernel. + * + * @param kernel Kernel to run the gate operation. + * @param data Pointer to data. + * @param num_qubits Number of qubits. + * @param op_name Gate operation name. + * @param wires Wires to apply gate to. + * @param adj Indicates whether to use adjoint of gate. + */ + auto applyGenerator(Gates::KernelType kernel, CFP_t *data, + size_t num_qubits, Gates::GeneratorOperation gntr_op, + const std::vector &wires, bool adj) const + -> PrecisionT { + using Gates::Constant::generator_names; + const auto iter = generators_.find(std::make_pair(gntr_op, kernel)); + if (iter == generators_.cend()) { + throw std::invalid_argument( + "Cannot find a gate with a given name \"" + + std::string(Util::lookup(generator_names, gntr_op)) + "\"."); + } + return (iter->second)(data, num_qubits, wires, adj); + } /** * @brief Apply a single generator to the state-vector using the given * kernel. diff --git a/pennylane_lightning/src/simulator/StateVectorBase.hpp b/pennylane_lightning/src/simulator/StateVectorBase.hpp index e2b3ac32e8..9861ec0c39 100644 --- a/pennylane_lightning/src/simulator/StateVectorBase.hpp +++ b/pennylane_lightning/src/simulator/StateVectorBase.hpp @@ -98,11 +98,12 @@ namespace Pennylane { * @tparam PrecisionT Floating point precision of underlying statevector data. * @tparam Derived Type of a derived class */ -template class StateVectorBase { +template class StateVectorBase { public: /** * @brief StateVector complex precision type. */ + using PrecisionT = T; using ComplexPrecisionT = std::complex; private: @@ -153,6 +154,12 @@ template class StateVectorBase { gntr_op); } + [[nodiscard]] inline auto + getKernelForMatrix(Gates::MatrixOperation mat_op) const + -> Gates::KernelType { + return static_cast(this)->getKernelForMatrix(mat_op); + } + /** * @brief Compare two statevectors. * @@ -288,31 +295,6 @@ template class StateVectorBase { num_qubits_, opName, wires, adj); } - /** - * @brief Apply a given matrix directly to the statevector read directly - * from numpy data. Data can be in 1D or 2D format. - * - * @param matrix Pointer to the array data. - * @param wires Wires the gate applies to. - * @param inverse Indicate whether inverse should be taken. - */ - template - inline void applyMatrix_(const ComplexPrecisionT *matrix, - const std::vector &wires, - bool inverse = false) { - auto *arr = getData(); - Gates::SelectKernel::applyMatrix(arr, num_qubits_, matrix, - wires, inverse); - } - template - inline void applyMatrix_(const std::vector &matrix, - const std::vector &wires, - bool inverse = false) { - auto *arr = getData(); - Gates::SelectKernel::applyMatrix(arr, num_qubits_, matrix, - wires, inverse); - } - /** * @brief Apply a given matrix directly to the statevector read directly * from numpy data. Data can be in 1D or 2D format. @@ -325,33 +307,52 @@ template class StateVectorBase { const std::vector &wires, bool inverse = false) { namespace Constant = Gates::Constant; - using Gates::GateOperation; + using Gates::MatrixOperation; using Gates::SelectKernel; using Gates::static_lookup; - constexpr auto kernel = static_lookup( - Constant::default_kernel_for_gates); - static_assert( - Util::array_has_elt(SelectKernel::implemented_gates, - GateOperation::Matrix), - "The default kernel for applyMatrix does not implement it."); - applyMatrix_(matrix, wires, inverse); + auto &dispatcher = DynamicDispatcher::getInstance(); + auto *arr = getData(); + + if (wires.empty()) { + throw std::invalid_argument( + "Number of wires must be larger than 0"); + } + + switch (wires.size()) { + case 1: + dispatcher.applyMatrix( + getKernelForMatrix(MatrixOperation::SingleQubitOp), arr, + MatrixOperation::SingleQubitOp, num_qubits_, matrix, wires, + inverse); + return; + case 2: + dispatcher.applyMatrix( + getKernelForMatrix(MatrixOperation::TwoQubitOp), arr, + MatrixOperation::TwoQubitOp, num_qubits_, matrix, wires, + inverse); + return; + default: + dispatcher.applyMatrix( + getKernelForMatrix(MatrixOperation::MultiQubitOp), arr, + MatrixOperation::MultiQubitOp, num_qubits_, matrix, wires, + inverse); + return; + } + PL_UNREACHABLE; } - inline void applyMatrix(const std::vector &matrix, + + template + inline void applyMatrix(const std::vector &matrix, const std::vector &wires, bool inverse = false) { - namespace Constant = Gates::Constant; - using Gates::GateOperation; - using Gates::SelectKernel; - using Gates::static_lookup; + if (matrix.size() != Util::exp2(2 * wires.size())) { + throw std::invalid_argument( + "The size of matrix does not match with the given " + "number of wires"); + } - constexpr auto kernel = static_lookup( - Constant::default_kernel_for_gates); - static_assert( - Util::array_has_elt(SelectKernel::implemented_gates, - GateOperation::Matrix), - "The default kernel for applyMatrix does not implement it."); - applyMatrix_(matrix, wires, inverse); + applyMatrix(matrix.data(), wires, inverse); } /** diff --git a/pennylane_lightning/src/simulator/StateVectorCPU.hpp b/pennylane_lightning/src/simulator/StateVectorCPU.hpp index 89ff8d40a4..bb08bf30e1 100644 --- a/pennylane_lightning/src/simulator/StateVectorCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorCPU.hpp @@ -32,16 +32,19 @@ class StateVectorCPU : public StateVectorBase { public: using ComplexPrecisionT = std::complex; + protected: + const Threading threading_; + const CPUMemoryModel memory_model_; + private: using BaseType = StateVectorBase; - Threading threading_; - CPUMemoryModel memory_model_; - std::unordered_map kernel_for_gates_; std::unordered_map kernel_for_generators_; + std::unordered_map + kernel_for_matrices_; void setKernels(size_t num_qubits, Threading threading, CPUMemoryModel memory_model) { @@ -50,6 +53,8 @@ class StateVectorCPU : public StateVectorBase { num_qubits, threading, memory_model); kernel_for_generators_ = default_kernels.getGeneratorKernelMap( num_qubits, threading, memory_model); + kernel_for_matrices_ = default_kernels.getMatrixKernelMap( + num_qubits, threading, memory_model); } protected: @@ -71,6 +76,14 @@ class StateVectorCPU : public StateVectorBase { -> Gates::KernelType { return kernel_for_generators_.at(gntr_op); } -}; + [[nodiscard]] inline auto + getKernelForMatrix(Gates::MatrixOperation mat_op) const + -> Gates::KernelType { + return kernel_for_matrices_.at(mat_op); + } + + inline CPUMemoryModel memoryModel() const { return memory_model_; } + inline Threading threading() const { return threading_; } +}; } // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp index b36aac7f6d..599cb9b91b 100644 --- a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp @@ -11,6 +11,7 @@ #pragma once #include "BitUtil.hpp" +#include "CPUMemoryModel.hpp" #include "DispatchKeys.hpp" #include "Gates.hpp" #include "KernelType.hpp" @@ -49,9 +50,7 @@ class StateVectorManagedCPU : BaseType{num_qubits, threading, memory_model} { size_t length = BaseType::getLength(); - // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) - data_ = std::unique_ptr{ - new (std::align_val_t{64}) ComplexPrecisionT[length]}; + data_ = allocateMemory(memory_model, length); std::fill(data_.get(), data_.get() + length, ComplexPrecisionT{0.0, 0.0}); data_[0] = {1, 0}; @@ -59,19 +58,14 @@ class StateVectorManagedCPU template explicit StateVectorManagedCPU( - const StateVectorBase &other, - Threading threading = bestThreading(), - CPUMemoryModel memory_model = bestCPUMemoryModel()) - : BaseType(other.getNumQubits(), threading, memory_model) { + const StateVectorCPU &other) + : BaseType(other.getNumQubits(), other.threading(), + other.memoryModel()) { size_t length = BaseType::getLength(); - // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) - data_ = std::unique_ptr{ - new (std::align_val_t{64}) ComplexPrecisionT[length]}; + data_ = allocateMemory(other.memoryModel(), length); std::copy(other.getData(), other.getData() + length, data_.get()); - - setKernels(BaseType::getNumQubits(), threading, memory_model); } StateVectorManagedCPU(const ComplexPrecisionT *other_data, @@ -83,9 +77,7 @@ class StateVectorManagedCPU PL_ABORT_IF_NOT(Util::isPerfectPowerOf2(other_size), "The size of provided data must be a power of 2."); - // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) - data_ = std::unique_ptr{ - new (std::align_val_t{64}) ComplexPrecisionT[other_size]}; + data_ = allocateMemory(memory_model, other_size); updateData(other_data); } @@ -101,9 +93,7 @@ class StateVectorManagedCPU StateVectorManagedCPU(const StateVectorManagedCPU &rhs) : BaseType(rhs) { size_t length = BaseType::getLength(); - // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) - data_ = std::unique_ptr{ - new (std::align_val_t{64}) ComplexPrecisionT[length]}; + data_ = allocateMemory(rhs.memory_model_, length); std::copy(rhs.getData(), rhs.getData() + length, data_.get()); } @@ -130,5 +120,4 @@ class StateVectorManagedCPU std::copy(data, data + BaseType::getLength(), data_.get()); } }; - } // namespace Pennylane diff --git a/pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp b/pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp index aadc5426d0..663c76e51c 100644 --- a/pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp +++ b/pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp @@ -30,3 +30,74 @@ TEST_CASE("Test default kernels for generators are well defined", } }); } + +TEST_CASE("Test unallowed kernel", "[Test_DefaultKernelsForStateVector]") { + using Gates::GateOperation; + using Gates::GeneratorOperation; + using Gates::KernelType; + auto &instance = DefaultKernelsForStateVector::getInstance(); + REQUIRE_THROWS(instance.assignKernelForGate( + GateOperation::PauliX, Threading::SingleThread, + CPUMemoryModel::Unaligned, 0, Util::full_domain(), + KernelType::None)); +} + +TEST_CASE("Test few limiting cases of default kernels", + "[Test_DefaultKernelsForStateVector]") { + auto &instance = DefaultKernelsForStateVector::getInstance(); + SECTION("Single thread, large number of qubits") { + // For large N, single thread calls "LM" for all single- and two-qubit + // gates. For three-qubit gates, we use PI. + auto gate_map = instance.getGateKernelMap(24, Threading::SingleThread, + CPUMemoryModel::Unaligned); + Util::for_each_enum( + [&gate_map](Gates::GateOperation gate_op) { + INFO(Util::lookup(Gates::Constant::gate_names, gate_op)); + if (gate_op == Gates::GateOperation::MultiRZ) { + REQUIRE(gate_map[gate_op] == Gates::KernelType::LM); + } else if (Util::lookup(Gates::Constant::gate_wires, gate_op) != + 3) { + REQUIRE(gate_map[gate_op] == Gates::KernelType::LM); + } else { + REQUIRE(gate_map[gate_op] == Gates::KernelType::PI); + } + }); + } + SECTION("Single thread, N = 14") { + // For large N = 14, IsingXX with "PI" is slightly faster + auto gate_map = instance.getGateKernelMap(14, Threading::SingleThread, + CPUMemoryModel::Unaligned); + REQUIRE(gate_map[Gates::GateOperation::IsingXX] == + Gates::KernelType::PI); + } +} + +TEST_CASE("Test priority works", "[Test_DefaultKernelsForStateVector]") { + using Gates::GateOperation; + using Gates::GeneratorOperation; + using Gates::KernelType; + auto &instance = DefaultKernelsForStateVector::getInstance(); + SECTION("Test assignKernelForGate") { + auto original_kernel = instance.getGateKernelMap( + 24, Threading::SingleThread, + CPUMemoryModel::Unaligned)[GateOperation::PauliX]; + + instance.assignKernelForGate( + GateOperation::PauliX, Threading::SingleThread, + CPUMemoryModel::Unaligned, 100, Util::full_domain(), + KernelType::PI); + + REQUIRE(instance.getGateKernelMap( + 24, Threading::SingleThread, + CPUMemoryModel::Unaligned)[GateOperation::PauliX] == + KernelType::PI); + + instance.removeKernelForGenerator(GateOperation::PauliX, + Threading::SingleThread, + CPUMemoryModel::Unaligned, 100); + REQUIRE(instance.getGateKernelMap( + 24, Threading::SingleThread, + CPUMemoryModel::Unaligned)[GateOperation::PauliX] == + original_kernel); + } +} diff --git a/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp b/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp index 0146f99c35..3511a12da9 100644 --- a/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp +++ b/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp @@ -81,12 +81,10 @@ constexpr void testAllGatesForKernelIter(RandomEngine &re, if constexpr (idx < static_cast(GateOperation::END)) { constexpr auto gate_op = static_cast(idx); - if constexpr (gate_op != GateOperation::Matrix) { // ignore Matrix - for (size_t num_qubits = 3; num_qubits <= max_num_qubits; - num_qubits++) { - testDispatchForKernel:: - template test(re, num_qubits); - } + for (size_t num_qubits = 3; num_qubits <= max_num_qubits; + num_qubits++) { + testDispatchForKernel:: + template test(re, num_qubits); } testAllGatesForKernelIter::value); - if constexpr (gate_op != GateOperation::Matrix) { - const auto all_wires = crateAllWires(num_qubits, gate_op, true); - for (const auto &wires : all_wires) { - const auto params = createParams(gate_op); - const auto gate_name = lookup(Constant::gate_names, gate_op); - DYNAMIC_SECTION( - "Test gate " - << gate_name - << " with inverse = false") { // Test with inverse = false - const auto results = Util::tuple_to_array( - applyGateForImplemetingKernels( - ini, num_qubits, wires, false, params, - std::make_index_sequence()>())); - - for (size_t i = 0; i < results.size() - 1; i++) { - REQUIRE(results[i] == - PLApprox(results[i + 1]).margin(1e-7)); - } + const auto all_wires = crateAllWires(num_qubits, gate_op, true); + for (const auto &wires : all_wires) { + const auto params = createParams(gate_op); + const auto gate_name = lookup(Constant::gate_names, gate_op); + DYNAMIC_SECTION( + "Test gate " + << gate_name + << " with inverse = false") { // Test with inverse = false + const auto results = Util::tuple_to_array( + applyGateForImplemetingKernels( + ini, num_qubits, wires, false, params, + std::make_index_sequence()>())); + + for (size_t i = 0; i < results.size() - 1; i++) { + REQUIRE(results[i] == PLApprox(results[i + 1]).margin(1e-7)); } + } - DYNAMIC_SECTION( - "Test gate " - << gate_name - << " with inverse = true") { // Test with inverse = true - const auto results = Util::tuple_to_array( - applyGateForImplemetingKernels( - ini, num_qubits, wires, true, params, - std::make_index_sequence()>())); - - for (size_t i = 0; i < results.size() - 1; i++) { - REQUIRE(results[i] == - PLApprox(results[i + 1]).margin(1e-7)); - } + DYNAMIC_SECTION("Test gate " + << gate_name + << " with inverse = true") { // Test with inverse = true + const auto results = Util::tuple_to_array( + applyGateForImplemetingKernels( + ini, num_qubits, wires, true, params, + std::make_index_sequence()>())); + + for (size_t i = 0; i < results.size() - 1; i++) { + REQUIRE(results[i] == PLApprox(results[i + 1]).margin(1e-7)); } } } diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp index 19ffb8535b..4869678201 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp @@ -26,29 +26,24 @@ using namespace Pennylane::Gates; template void testInverseKernelGate(RandomEngine &re, size_t num_qubits) { - if constexpr (gate_op != GateOperation::Matrix) { - constexpr auto gate_name = static_lookup(Constant::gate_names); - DYNAMIC_SECTION("Test inverse of " << gate_name << " for kernel " - << GateImplementation::name) { - const auto ini_st = createRandomState(re, num_qubits); + constexpr auto gate_name = static_lookup(Constant::gate_names); + DYNAMIC_SECTION("Test inverse of " << gate_name << " for kernel " + << GateImplementation::name) { + const auto ini_st = createRandomState(re, num_qubits); - auto st = ini_st; + auto st = ini_st; - const auto func_ptr = - GateOpToMemberFuncPtr::value; + const auto func_ptr = + GateOpToMemberFuncPtr::value; - const auto wires = createWires(gate_op, num_qubits); - const auto params = createParams(gate_op); + const auto wires = createWires(gate_op, num_qubits); + const auto params = createParams(gate_op); - callGateOps(func_ptr, st.data(), num_qubits, wires, false, params); - callGateOps(func_ptr, st.data(), num_qubits, wires, true, params); + callGateOps(func_ptr, st.data(), num_qubits, wires, false, params); + callGateOps(func_ptr, st.data(), num_qubits, wires, true, params); - REQUIRE(st == PLApprox(ini_st).margin(1e-7)); - } - } else { - static_cast(re); - static_cast(num_qubits); + REQUIRE(st == PLApprox(ini_st).margin(1e-7)); } } diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp index dfda96073f..72eba17f63 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp @@ -13,20 +13,46 @@ using ApplyMatrixType = void (*)(std::complex *, size_t, const std::vector &, bool); template -struct IsApplyMatrixDefined { +struct IsApplySingleQubitOpDefined { constexpr static bool value = false; }; template -struct IsApplyMatrixDefined< +struct IsApplySingleQubitOpDefined< PrecisionT, GateImplementation, std::enable_if_t< std::is_pointer_v>( - &GateImplementation::template applyMatrix))>>> { + &GateImplementation::template applySingleQubitOp))>>> { constexpr static bool value = true; }; +template +struct IsApplyTwoQubitOpDefined { + constexpr static bool value = false; +}; template -void testApplyMatrix() { +struct IsApplyTwoQubitOpDefined< + PrecisionT, GateImplementation, + std::enable_if_t< + std::is_pointer_v>( + &GateImplementation::template applyTwoQubitOp))>>> { + constexpr static bool value = true; +}; + +template +struct IsApplyMultiQubitOpDefined { + constexpr static bool value = false; +}; +template +struct IsApplyMultiQubitOpDefined< + PrecisionT, GateImplementation, + std::enable_if_t< + std::is_pointer_v>( + &GateImplementation::template applyMultiQubitOp))>>> { + constexpr static bool value = true; +}; + +template +void testApplySingleQubitOp() { using ComplexPrecisionT = std::complex; DYNAMIC_SECTION(GateImplementation::name @@ -80,8 +106,8 @@ void testApplyMatrix() { }; auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); + GateImplementation::applySingleQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); REQUIRE(st == PLApprox(expected).margin(1e-5)); } @@ -136,8 +162,8 @@ void testApplyMatrix() { }; auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); + GateImplementation::applySingleQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); REQUIRE(st == PLApprox(expected).margin(1e-5)); } @@ -192,10 +218,15 @@ void testApplyMatrix() { }; auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); + GateImplementation::applySingleQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); REQUIRE(st == PLApprox(expected).margin(1e-5)); } +} + +template +void testApplyTwoQubitOp() { + using ComplexPrecisionT = std::complex; DYNAMIC_SECTION(GateImplementation::name << ", Matrix0,1 - " << PrecisionToName::value) { @@ -260,8 +291,8 @@ void testApplyMatrix() { }; auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); + GateImplementation::applyTwoQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); REQUIRE(st == PLApprox(expected).margin(1e-5)); } @@ -328,10 +359,15 @@ void testApplyMatrix() { }; auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); + GateImplementation::applyTwoQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); REQUIRE(st == PLApprox(expected).margin(1e-5)); } +} + +template +void testApplyMultiQubitOp() { + using ComplexPrecisionT = std::complex; DYNAMIC_SECTION(GateImplementation::name << ", Matrix1,2,3 - " @@ -445,8 +481,8 @@ void testApplyMatrix() { }; auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); + GateImplementation::applyMultiQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); REQUIRE(st == PLApprox(expected).margin(1e-5)); } @@ -754,25 +790,57 @@ void testApplyMatrix() { }; auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); + GateImplementation::applyMultiQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); REQUIRE(st == PLApprox(expected).margin(1e-5)); } } template -void testApplyMatrixForKernels() { +void testApplySingleQubitOpForKernels() { + if constexpr (!std::is_same_v) { + using GateImplementation = typename TypeList::Type; + + if constexpr (IsApplySingleQubitOpDefined::value) { + testApplySingleQubitOp(); + } else { + SUCCEED( + "Member function applySingleQubitOp is not defined in kernel" + << GateImplementation::name); + } + testApplySingleQubitOpForKernels(); + } +} + +template +void testApplyTwoQubitOpForKernels() { if constexpr (!std::is_same_v) { using GateImplementation = typename TypeList::Type; - if constexpr (IsApplyMatrixDefined::value) { - testApplyMatrix(); + if constexpr (IsApplyTwoQubitOpDefined::value) { + testApplyTwoQubitOp(); } else { - SUCCEED("Member function applyMatrix is not defined in kernel" + SUCCEED("Member function applyTwoQubitOp is not defined in kernel" << GateImplementation::name); } - testApplyMatrixForKernels(); + testApplyTwoQubitOpForKernels(); + } +} +template +void testApplyMultiQubitOpForKernels() { + if constexpr (!std::is_same_v) { + using GateImplementation = typename TypeList::Type; + + if constexpr (IsApplyMultiQubitOpDefined::value) { + testApplyMultiQubitOp(); + } else { + SUCCEED("Member function applyMultiQubitOp is not defined in kernel" + << GateImplementation::name); + } + testApplyMultiQubitOpForKernels(); } } @@ -780,11 +848,13 @@ TEMPLATE_TEST_CASE("GateImplementation::applyMatrix, inverse = false", "[GateImplementations_Matrix]", float, double) { using PrecisionT = TestType; - testApplyMatrixForKernels(); + testApplySingleQubitOpForKernels(); + testApplyTwoQubitOpForKernels(); + testApplyMultiQubitOpForKernels(); } template -void testApplyMatrixInverse() { +void testApplySingleQubitOpInverse() { std::mt19937 re{1337}; const int num_qubits = 4; @@ -798,10 +868,10 @@ void testApplyMatrixInverse() { const auto matrix = randomUnitary(re, wires.size()); auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, true); + GateImplementation::applySingleQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); + GateImplementation::applySingleQubitOp(st.data(), num_qubits, + matrix.data(), wires, true); REQUIRE(st == PLApprox(ini_st).margin(1e-5)); } @@ -815,10 +885,10 @@ void testApplyMatrixInverse() { const auto matrix = randomUnitary(re, wires.size()); auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, true); + GateImplementation::applySingleQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); + GateImplementation::applySingleQubitOp(st.data(), num_qubits, + matrix.data(), wires, true); REQUIRE(st == PLApprox(ini_st).margin(1e-5)); } @@ -833,10 +903,10 @@ void testApplyMatrixInverse() { const auto matrix = randomUnitary(re, wires.size()); auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, true); + GateImplementation::applySingleQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); + GateImplementation::applySingleQubitOp(st.data(), num_qubits, + matrix.data(), wires, true); REQUIRE(st == PLApprox(ini_st).margin(1e-5)); } @@ -851,13 +921,19 @@ void testApplyMatrixInverse() { const auto matrix = randomUnitary(re, wires.size()); auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, true); + GateImplementation::applySingleQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); + GateImplementation::applySingleQubitOp(st.data(), num_qubits, + matrix.data(), wires, true); REQUIRE(st == PLApprox(ini_st).margin(1e-5)); } +} + +template +void testApplyTwoQubitOpInverse() { + std::mt19937 re{1337}; + const int num_qubits = 4; DYNAMIC_SECTION(GateImplementation::name << ", wires = {0,1} - " @@ -869,10 +945,10 @@ void testApplyMatrixInverse() { const auto matrix = randomUnitary(re, wires.size()); auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, true); + GateImplementation::applyTwoQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); + GateImplementation::applyTwoQubitOp(st.data(), num_qubits, + matrix.data(), wires, true); REQUIRE(st == PLApprox(ini_st).margin(1e-5)); } @@ -885,10 +961,10 @@ void testApplyMatrixInverse() { const auto matrix = randomUnitary(re, wires.size()); auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, true); + GateImplementation::applyTwoQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); + GateImplementation::applyTwoQubitOp(st.data(), num_qubits, + matrix.data(), wires, true); REQUIRE(st == PLApprox(ini_st).margin(1e-5)); } @@ -901,13 +977,19 @@ void testApplyMatrixInverse() { const auto matrix = randomUnitary(re, wires.size()); auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, true); + GateImplementation::applyTwoQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); + GateImplementation::applyTwoQubitOp(st.data(), num_qubits, + matrix.data(), wires, true); REQUIRE(st == PLApprox(ini_st).margin(1e-5)); } +} + +template +void testApplyMultiQubitOpInverse() { + std::mt19937 re{1337}; + const int num_qubits = 4; DYNAMIC_SECTION(GateImplementation::name << ", wires = {1,2,3} - " @@ -918,10 +1000,10 @@ void testApplyMatrixInverse() { const auto matrix = randomUnitary(re, wires.size()); auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, true); + GateImplementation::applyMultiQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); + GateImplementation::applyMultiQubitOp(st.data(), num_qubits, + matrix.data(), wires, true); REQUIRE(st == PLApprox(ini_st).margin(1e-5)); } @@ -934,27 +1016,61 @@ void testApplyMatrixInverse() { const auto matrix = randomUnitary(re, wires.size()); auto st = ini_st; - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, false); - GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), - wires, true); + GateImplementation::applyMultiQubitOp(st.data(), num_qubits, + matrix.data(), wires, false); + GateImplementation::applyMultiQubitOp(st.data(), num_qubits, + matrix.data(), wires, true); REQUIRE(st == PLApprox(ini_st).margin(1e-5)); } } template -void testApplyMatrixInverseForKernels() { +void testApplySingleQubitOpInverseForKernels() { + if constexpr (!std::is_same_v) { + using GateImplementation = typename TypeList::Type; + if constexpr (IsApplySingleQubitOpDefined::value) { + testApplySingleQubitOpInverse(); + } else { + SUCCEED( + "Member function applySingleQubitOp is not defined in kernel" + << GateImplementation::name); + } + testApplySingleQubitOpInverseForKernels(); + } +} + +template +void testApplyTwoQubitOpInverseForKernels() { + if constexpr (!std::is_same_v) { + using GateImplementation = typename TypeList::Type; + if constexpr (IsApplyTwoQubitOpDefined::value) { + testApplyTwoQubitOpInverse(); + } else { + SUCCEED("Member function applyTwoQubitOp is not defined in kernel" + << GateImplementation::name); + } + testApplyTwoQubitOpInverseForKernels(); + } +} + +template +void testApplyMultiQubitOpInverseForKernels() { if constexpr (!std::is_same_v) { using GateImplementation = typename TypeList::Type; - if constexpr (IsApplyMatrixDefined::value) { - testApplyMatrixInverse(); + if constexpr (IsApplyMultiQubitOpDefined::value) { + testApplyMultiQubitOpInverse(); } else { - SUCCEED("Member function applyMatrix is not defined in kernel" + SUCCEED("Member function applyMultiQubitOp is not defined in kernel" << GateImplementation::name); } - testApplyMatrixInverseForKernels(); + testApplyMultiQubitOpInverseForKernels(); } } @@ -962,5 +1078,7 @@ TEMPLATE_TEST_CASE("GateImplementation::applyMatrix, inverse = true", "[GateImplementations_Matrix]", float, double) { using PrecisionT = TestType; - testApplyMatrixInverseForKernels(); + testApplySingleQubitOpInverseForKernels(); + testApplyTwoQubitOpInverseForKernels(); + testApplyMultiQubitOpInverseForKernels(); } diff --git a/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp b/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp index a46a7387f6..6f3f5bdd4f 100644 --- a/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp +++ b/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp @@ -23,11 +23,9 @@ template (GateOperation::END)) { constexpr auto gate_op = static_cast(gate_idx); - if constexpr (gate_op != GateOperation::Matrix) { - static_cast( - GateOpToMemberFuncPtr::value); - } + static_cast( + GateOpToMemberFuncPtr::value); return testAllGatesImplementedIter(); } else { @@ -153,8 +151,7 @@ static_assert(testAllGatesImplemeted(), struct ImplementedGates { constexpr static auto value = DummyImplementation::implemented_gates; - constexpr static std::array ignore_list = { - GateOperation::Matrix}; + constexpr static std::array ignore_list = {}; template constexpr static auto func_ptr = diff --git a/pennylane_lightning/src/util/ConstantUtil.hpp b/pennylane_lightning/src/util/ConstantUtil.hpp index 532b49ee01..aec36602e1 100644 --- a/pennylane_lightning/src/util/ConstantUtil.hpp +++ b/pennylane_lightning/src/util/ConstantUtil.hpp @@ -207,4 +207,11 @@ constexpr auto reverse_pairs(const std::array, size> &arr) return Internal::reverse_pairs_helper(arr, std::make_index_sequence{}); } + +constexpr auto constIsPerfectPowerOf2(size_t value) -> bool { + while ((value & 1U) == 0) { + value >>= 1U; + } + return value == 1; +} } // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/IntegerInterval.hpp b/pennylane_lightning/src/util/IntegerInterval.hpp new file mode 100644 index 0000000000..24f14959b9 --- /dev/null +++ b/pennylane_lightning/src/util/IntegerInterval.hpp @@ -0,0 +1,94 @@ +// Copyright 2022 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/** + * @file + */ +#include +#include +#include +#include + +namespace Pennylane::Util { + +/** + * @brief Define integer interval [min_, max_) + */ +template class IntegerInterval { + private: + static_assert(std::is_integral_v && + std::is_unsigned_v); + + IntegerType min_; + IntegerType max_; + + public: + constexpr IntegerInterval(IntegerType min, IntegerType max) + : min_{min}, max_{max} { + assert(min < max); + } + bool operator()(IntegerType test_val) const { + return (min_ <= test_val) && (test_val < max_); + } + + [[nodiscard]] IntegerType min() const { return min_; } + + [[nodiscard]] IntegerType max() const { return max_; } +}; + +template +auto larger_than(IntegerType from) -> IntegerInterval { + return IntegerInterval{ + from + 1, std::numeric_limits::max()}; +} +template +auto larger_than_equal_to(IntegerType from) -> IntegerInterval { + return IntegerInterval{ + from, std::numeric_limits::max()}; +} +template +auto less_than(IntegerType to) -> IntegerInterval { + return IntegerInterval{0, to}; +} +template +auto less_than_equal_to(IntegerType to) -> IntegerInterval { + return IntegerInterval{0, to + 1}; +} +template +auto in_between_closed(IntegerType from, IntegerType to) + -> IntegerInterval { + return IntegerInterval{from, to + 1}; +} +template +auto full_domain() -> IntegerInterval { + return IntegerInterval{ + 0, std::numeric_limits::max()}; +} + +template +bool is_disjoint(const IntegerInterval &interval1, + const IntegerInterval &interval2) { + return (interval1.max() <= interval2.min()) || + (interval2.max() <= interval1.min()); +} + +template +auto union_interval(const IntegerInterval &interval1, + const IntegerInterval &interval2) + -> IntegerInterval { + return IntegerInterval{ + std::min(interval1.min(), interval2.min()), + std::max(interval1.max(), interval2.max())}; +} + +} // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/Memory.hpp b/pennylane_lightning/src/util/Memory.hpp index 6a6df1f1ce..223d977c0a 100644 --- a/pennylane_lightning/src/util/Memory.hpp +++ b/pennylane_lightning/src/util/Memory.hpp @@ -16,19 +16,12 @@ #include #include +#include "ConstantUtil.hpp" #include "TypeList.hpp" namespace Pennylane { - -constexpr auto constIsPerfectPowerOf2(size_t value) -> bool { - while ((value & 1U) == 0) { - value >>= 1U; - } - return value == 1; -} - template struct AlignedAllocator { - static_assert(constIsPerfectPowerOf2(alignment), + static_assert(Util::constIsPerfectPowerOf2(alignment), "Template parameter alignment must be power of 2."); using value_type = T; @@ -101,4 +94,5 @@ template template using PLAllocator = std::conditional_t, AlignedAllocator>; + } // namespace Pennylane diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp index ca029c0609..2da023e2ea 100644 --- a/pennylane_lightning/src/util/Util.hpp +++ b/pennylane_lightning/src/util/Util.hpp @@ -404,6 +404,16 @@ auto chunkData(const Container &data, std::size_t num_chunks) // type alias template using remove_cvref_t = typename remove_cvref::type; +/** + * @brief Hash for std::pair + */ +struct PairHash { + template + size_t operator()(const std::pair &p) const { + return std::hash()(p.first) ^ std::hash()(p.second); + } +}; + /** * @brief Iterate over all enum values (if BEGIN and END are defined). * diff --git a/tests/test_adjoint_jacobian.py b/tests/test_adjoint_jacobian.py index 8d3d31d62c..4872212506 100644 --- a/tests/test_adjoint_jacobian.py +++ b/tests/test_adjoint_jacobian.py @@ -168,16 +168,16 @@ def test_unsupported_hermitian_expectation(self, dev): ) @pytest.mark.skipif(not lq._CPP_BINARY_AVAILABLE, reason="Lightning binary required") def test_unsupported_complex_type(self, dev): - dev._state = dev._asarray(dev._state, np.complex256) + with pytest.raises(TypeError, match="Unsupported .*"): + dev._state = dev._asarray(dev._state, np.complex256) - with qml.tape.JacobianTape() as tape: - qml.QubitStateVector(np.array([1.0, -1.0]) / np.sqrt(2), wires=0) - qml.RX(0.3, wires=[0]) - qml.expval(qml.PauliZ(0)) + with qml.tape.JacobianTape() as tape: + qml.QubitStateVector(np.array([1.0, -1.0]) / np.sqrt(2), wires=0) + qml.RX(0.3, wires=[0]) + qml.expval(qml.PauliZ(0)) - tape.trainable_params = {1} + tape.trainable_params = {1} - with pytest.raises(TypeError, match="Unsupported complex Type: complex256"): dev.adjoint_jacobian(tape) @pytest.mark.parametrize("theta", np.linspace(-2 * np.pi, 2 * np.pi, 7)) diff --git a/tests/test_apply.py b/tests/test_apply.py index d3ab603077..ee3af3144c 100644 --- a/tests/test_apply.py +++ b/tests/test_apply.py @@ -643,44 +643,6 @@ def test_load_default_qubit_device(self): assert dev.shots is None assert dev.short_name == "lightning.qubit" - @pytest.mark.skipif(not lq._CPP_BINARY_AVAILABLE, reason="Lightning binary required") - def test_load_default_qubit_device_with_valid_kernel(self): - """Test that lightning.qubit works with valid kernel_for_ops argument.""" - for gate in ["PauliX", "CRot", "CSWAP", "Matrix"]: - dev = qml.device("lightning.qubit", kernel_for_ops={gate: "PI"}, wires=2) - - assert dev.num_wires == 2 - assert dev.shots is None - assert dev.short_name == "lightning.qubit" - - @pytest.mark.skipif(not lq._CPP_BINARY_AVAILABLE, reason="Lightning binary required") - def test_load_default_qubit_device_with_invalid_kernel(self): - """Test that lightning.qubit raises error for unsupported gate/kernel pair.""" - - for gate in ["PauliX", "CRot", "CSWAP", "Matrix"]: - with pytest.raises( - ValueError, match=f"The given kernel Unknown does not implement {gate} gate." - ): - dev = qml.device("lightning.qubit", kernel_for_ops={gate: "Unknown"}, wires=2) - - @pytest.mark.skipif(not lq._CPP_BINARY_AVAILABLE, reason="Lightning binary required") - def test_load_default_qubit_device_with_invalid_param(self): - """Test that lightning.qubit does not support kernel_for_ops type list.""" - with pytest.raises(ValueError, match=f"Argument kernel_for_ops must be a dictionary."): - dev = qml.device("lightning.qubit", kernel_for_ops=["I am a list"], wires=2) - - @pytest.mark.skipif(not lq._CPP_BINARY_AVAILABLE, reason="Lightning binary required") - def test_all_exported_gates_are_available(self): - """Test all exported gates from lightning_qubit_ops are accessible""" - from pennylane_lightning import lightning_qubit_ops - from pennylane_lightning.lightning_qubit_ops import StateVectorC128 as SV - - for kernel, gate_op in lightning_qubit_ops.EXPORTED_KERNEL_OPS: - if gate_op != "Matrix": - assert getattr(SV, f"{gate_op}_{kernel}", None) is not None - else: - assert getattr(SV, f"applyMatrix_{kernel}", None) is not None - def test_no_backprop(self): """Test that lightning.qubit does not support the backprop differentiation method.""" diff --git a/tests/test_array.py b/tests/test_array.py new file mode 100644 index 0000000000..b154ac2a4d --- /dev/null +++ b/tests/test_array.py @@ -0,0 +1,34 @@ +# Copyright 2022 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Unit tests for the :mod:`pennylane_lightning.LightningQubit` device. +""" +import numpy as np +import pennylane as qml +import pytest +from pennylane import DeviceError + +from pennylane_lightning.lightning_qubit import CPP_BINARY_AVAILABLE + +try: + from pennylane_lightning.lightning_qubit_ops import allocate_aligned_array +except (ImportError, ModuleNotFoundError): + pytest.skip("No binary module found. Skipping.", allow_module_level=True) + + +@pytest.mark.skipif(not CPP_BINARY_AVAILABLE, reason="Lightning binary required") +@pytest.mark.parametrize("dt", [np.dtype(np.complex64), np.dtype(np.complex128)]) +def test_allocate_aligned_array(dt): + arr = allocate_aligned_array(1024, dt) + assert arr.dtype == dt diff --git a/tests/test_serialize.py b/tests/test_serialize.py index f62795f4d5..2cff781428 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -22,7 +22,6 @@ _serialize_obs, _serialize_ops, _obs_has_kernel, - _is_lightning_gate, ) import pytest from unittest import mock @@ -36,45 +35,6 @@ pytest.skip("No binary module found. Skipping.", allow_module_level=True) -class TestIsLightningGate: - """Tests for the _is_lightning_gate""" - - def test_gates(self): - """Test if returns true for some gates""" - for gate in [ - "PauliX", - "PauliY", - "PauliZ", - "Hadamard", - "S", - "T", - "PhaseShift", - "RX", - "RY", - "RZ", - "Rot", - "CNOT", - "CY", - "CZ", - "SWAP", - "ControlledPhaseShift", - "CRX", - "CRY", - "CRZ", - "CRot", - "Toffoli", - "CSWAP", - ]: - assert _is_lightning_gate(gate) - - def test_matrix(self): - assert not _is_lightning_gate("Matrix") - - def test_non_gates(self): - for gate in ["Quantum", "computing", "in", "2022", "with", "Pennylane", "Lightning"]: - assert not _is_lightning_gate(gate) - - class TestObsHasKernel: """Tests for the _obs_has_kernel function""" diff --git a/tests/test_vjp.py b/tests/test_vjp.py index 3ab352a6d9..2aa97c6faa 100644 --- a/tests/test_vjp.py +++ b/tests/test_vjp.py @@ -39,12 +39,14 @@ def dev(self): not hasattr(np, "complex256"), reason="Numpy only defines complex256 in Linux-like system" ) def test_unsupported_complex_type(self, dev): - dev._state = dev._asarray(dev._state, np.complex256) + with pytest.raises(TypeError, match="Unsupported .*"): + dev._state = dev._asarray(dev._state, np.complex256) - dy = np.array([[1.0, 2.0], [3.0, 4.0]]) - jac = np.array([[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]]) + dy = np.array([[1.0, 2.0], [3.0, 4.0]]) + jac = np.array( + [[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]] + ) - with pytest.raises(TypeError, match="Unsupported complex Type: complex256"): dev.compute_vjp(dy, jac) @pytest.mark.parametrize("C", [np.complex64, np.complex128]) @@ -120,21 +122,21 @@ def dev(self): not hasattr(np, "complex256"), reason="Numpy only defines complex256 in Linux-like system" ) def test_unsupported_complex_type(self, dev): - dev._state = dev._asarray(dev._state, np.complex256) + with pytest.raises(TypeError, match="Unsupported .*"): + dev._state = dev._asarray(dev._state, np.complex256) - x, y, z = [0.5, 0.3, -0.7] + x, y, z = [0.5, 0.3, -0.7] - with qml.tape.JacobianTape() as tape: - qml.RX(0.4, wires=[0]) - qml.Rot(x, y, z, wires=[0]) - qml.RY(-0.2, wires=[0]) - qml.expval(qml.PauliZ(0)) + with qml.tape.JacobianTape() as tape: + qml.RX(0.4, wires=[0]) + qml.Rot(x, y, z, wires=[0]) + qml.RY(-0.2, wires=[0]) + qml.expval(qml.PauliZ(0)) - tape.trainable_params = {1, 2, 3} + tape.trainable_params = {1, 2, 3} - dy = np.array([1.0]) + dy = np.array([1.0]) - with pytest.raises(TypeError, match="Unsupported complex Type: complex256"): dev.vjp(tape, dy)(tape) @pytest.mark.parametrize("C", [np.complex64, np.complex128]) @@ -468,26 +470,26 @@ def dev(self): not hasattr(np, "complex256"), reason="Numpy only defines complex256 in Linux-like system" ) def test_unsupported_complex_type(self, dev): - dev._state = dev._asarray(dev._state, np.complex256) + with pytest.raises(TypeError, match="Unsupported .*"): + dev._state = dev._asarray(dev._state, np.complex256) - with qml.tape.QuantumTape() as tape1: - qml.RX(0.4, wires=0) - qml.CNOT(wires=[0, 1]) - qml.expval(qml.PauliZ(0)) + with qml.tape.QuantumTape() as tape1: + qml.RX(0.4, wires=0) + qml.CNOT(wires=[0, 1]) + qml.expval(qml.PauliZ(0)) - with qml.tape.JacobianTape() as tape2: - qml.RX(0.4, wires=0) - qml.RX(0.6, wires=0) - qml.CNOT(wires=[0, 1]) - qml.expval(qml.PauliZ(0)) + with qml.tape.JacobianTape() as tape2: + qml.RX(0.4, wires=0) + qml.RX(0.6, wires=0) + qml.CNOT(wires=[0, 1]) + qml.expval(qml.PauliZ(0)) - tape1.trainable_params = {0} - tape2.trainable_params = {0, 1} + tape1.trainable_params = {0} + tape2.trainable_params = {0, 1} - tapes = [tape1, tape2] - dys = [np.array([1.0]), np.array([1.0])] + tapes = [tape1, tape2] + dys = [np.array([1.0]), np.array([1.0])] - with pytest.raises(TypeError, match="Unsupported complex Type: complex256"): dev.batch_vjp(tapes, dys) @pytest.mark.parametrize("C", [np.complex64, np.complex128]) From 12299f625246de099f8d966113168e63cec61de1 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 1 Mar 2022 20:30:11 -0500 Subject: [PATCH 04/94] Fix examples --- .../src/examples/CMakeLists.txt | 27 ++- .../src/examples/benchmark_gate.cpp | 204 ++++++++++++++++ .../src/examples/benchmark_gate_list.cpp | 223 ----------------- .../src/examples/benchmark_generator.cpp | 202 ++++++++++++++++ .../src/examples/benchmark_matrix.cpp | 144 +++++++++++ .../src/examples/benchmark_multi_rz.cpp | 77 ------ .../src/examples/benchmark_operation.cpp | 210 ---------------- .../src/examples/gate_benchmark_oplist.cpp | 227 ------------------ ...ot_gate_benchmark.py => plot_benchmark.py} | 0 .../src/examples/run_benchmark.sh | 39 +++ .../src/examples/run_gate_benchmark.sh | 55 ----- pennylane_lightning/src/examples/test.sh | 1 + .../cpu_kernels/GateImplementationsLM.hpp | 60 +++-- .../cpu_kernels/GateImplementationsPI.hpp | 8 +- .../src/simulator/DynamicDispatcher.hpp | 10 - .../src/simulator/StateVectorBase.hpp | 107 ++++++++- pennylane_lightning/src/tests/TestHelpers.hpp | 66 +---- .../src/tests/Test_AdjDiff.cpp | 14 +- .../tests/Test_GateImplementations_Matrix.cpp | 1 + .../src/tests/Test_Internal.cpp | 32 --- pennylane_lightning/src/tests/Test_Util.cpp | 32 +++ .../src/util/LinearAlgebra.hpp | 67 ++++++ 22 files changed, 866 insertions(+), 940 deletions(-) create mode 100644 pennylane_lightning/src/examples/benchmark_gate.cpp delete mode 100644 pennylane_lightning/src/examples/benchmark_gate_list.cpp create mode 100644 pennylane_lightning/src/examples/benchmark_generator.cpp create mode 100644 pennylane_lightning/src/examples/benchmark_matrix.cpp delete mode 100644 pennylane_lightning/src/examples/benchmark_multi_rz.cpp delete mode 100644 pennylane_lightning/src/examples/benchmark_operation.cpp delete mode 100644 pennylane_lightning/src/examples/gate_benchmark_oplist.cpp rename pennylane_lightning/src/examples/{plot_gate_benchmark.py => plot_benchmark.py} (100%) create mode 100755 pennylane_lightning/src/examples/run_benchmark.sh delete mode 100755 pennylane_lightning/src/examples/run_gate_benchmark.sh create mode 100644 pennylane_lightning/src/examples/test.sh diff --git a/pennylane_lightning/src/examples/CMakeLists.txt b/pennylane_lightning/src/examples/CMakeLists.txt index d58bcce5ba..addb786ad6 100644 --- a/pennylane_lightning/src/examples/CMakeLists.txt +++ b/pennylane_lightning/src/examples/CMakeLists.txt @@ -21,25 +21,28 @@ target_link_libraries(lightning_examples INTERFACE lightning_compile_options lightning_simulator lightning_utils) -add_executable(benchmark_operation benchmark_operation.cpp) -target_link_libraries(benchmark_operation PRIVATE lightning_examples) +add_executable(benchmark_gate benchmark_gate.cpp) +target_link_libraries(benchmark_gate PRIVATE lightning_examples) -add_executable(benchmark_operation_float benchmark_operation.cpp) -target_compile_options(benchmark_operation_float PRIVATE "-DUSE_SINGLE_PRECISION") -target_link_libraries(benchmark_operation_float PRIVATE lightning_examples) +add_executable(benchmark_generator benchmark_generator.cpp) +target_link_libraries(benchmark_generator PRIVATE lightning_examples) -add_executable(benchmark_multi_rz benchmark_multi_rz.cpp) -target_link_libraries(benchmark_multi_rz PRIVATE lightning_examples) +add_executable(benchmark_matrix benchmark_matrix.cpp) +target_link_libraries(benchmark_matrix PRIVATE lightning_examples) + +# add_executable(benchmark_operation_float benchmark_operation.cpp) +# target_compile_options(benchmark_operation_float PRIVATE "-DUSE_SINGLE_PRECISION") +# target_link_libraries(benchmark_operation_float PRIVATE lightning_examples) configure_file("compiler_info.in" "compiler_info.txt") -add_custom_command(TARGET benchmark_operation POST_BUILD +add_custom_command(TARGET benchmark_gate POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy - ${PROJECT_SOURCE_DIR}/run_gate_benchmark.sh - ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/run_gate_benchmark.sh + ${PROJECT_SOURCE_DIR}/run_benchmark.sh + ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/run_benchmark.sh COMMAND ${CMAKE_COMMAND} -E create_symlink - ${PROJECT_SOURCE_DIR}/plot_gate_benchmark.py - ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/plot_gate_benchmark.py + ${PROJECT_SOURCE_DIR}/plot_benchmark.py + ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/plot_benchmark.py COMMAND ${CMAKE_COMMAND} -E rename ${CMAKE_CURRENT_BINARY_DIR}/compiler_info.txt ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/compiler_info.txt) diff --git a/pennylane_lightning/src/examples/benchmark_gate.cpp b/pennylane_lightning/src/examples/benchmark_gate.cpp new file mode 100644 index 0000000000..00545b1988 --- /dev/null +++ b/pennylane_lightning/src/examples/benchmark_gate.cpp @@ -0,0 +1,204 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Constant.hpp" +#include "ExampleUtil.hpp" +#include "StateVectorManagedCPU.hpp" + +#ifdef USE_SINGLE_PRECISION +using PrecisionT = float; +#pragma message "Using single precision" +#else +using PrecisionT = double; +#endif + +using namespace Pennylane; +using namespace Pennylane::Gates; +using namespace Pennylane::Util; + +struct GateDesc { + std::string name; + std::vector wires; + bool inverse; + std::vector params; + + template + GateDesc(Arg0 &&arg0, Arg1 &&arg1, Arg2 &&arg2, Arg3 &&arg3) + : name{std::forward(arg0)}, wires{std::forward(arg1)}, + inverse{std::forward(arg2)}, params{std::forward(arg3)} {} +}; + +std::ostream &operator<<(std::ostream &os, GateDesc &desc) { + os << desc.name << ", " << desc.wires << "," << desc.inverse << "," + << desc.params << std::endl; + return os; +} + +template +auto generateGateSequence(RandomEngine &re, const std::string &gate_name, + const size_t num_reps, const size_t num_qubits, + const size_t num_wires_for_multi_qubit) + -> std::vector { + using Gates::Constant::multi_qubit_gates; + + const GateOperation gate_op = Util::lookup( + Util::reverse_pairs(Constant::gate_names), std::string_view(gate_name)); + const size_t num_wires = [=]() { + if (Util::array_has_elt(multi_qubit_gates, gate_op)) { + // if multi qubit gate + return num_wires_for_multi_qubit; + } + return Util::lookup(Constant::gate_wires, gate_op); + }(); + const size_t num_params = Util::lookup(Constant::gate_num_params, gate_op); + + std::vector gate_seq; + std::uniform_int_distribution inverse_dist(0, 1); + std::uniform_real_distribution param_dist(0.0, 2 * M_PI); + + for (uint32_t k = 0; k < num_reps; k++) { + std::vector params; + params.reserve(num_params); + + bool inverse = static_cast(inverse_dist(re)); + auto wires = generateNeighboringWires(re, num_qubits, num_wires); + + for (size_t idx = 0; idx < num_params; idx++) { + params.emplace_back(param_dist(re)); + } + + gate_seq.emplace_back(gate_name, std::move(wires), inverse, + std::move(params)); + } + return gate_seq; +} + +double benchmarkGate(KernelType kernel, const size_t num_qubits, + const std::vector &gate_seq) { + // Run benchmark. Total num_reps number of gates is used. + StateVectorManagedCPU svdat{num_qubits}; + + std::chrono::time_point t_start = + std::chrono::high_resolution_clock::now(); + for (const auto &gate : gate_seq) { + svdat.applyOperation(kernel, gate.name, gate.wires, gate.inverse, + gate.params); + } + std::chrono::time_point t_end = + std::chrono::high_resolution_clock::now(); + + return std::chrono::duration(t_end - t_start).count(); +} + +template +double runBenchmarkGate(RandomEngine &re, KernelType kernel, + const std::string &gate_name, size_t num_reps, + size_t num_qubits, size_t num_wires_for_multi_qubit) { + auto gate_seq = generateGateSequence(re, gate_name, num_reps, num_qubits, + num_wires_for_multi_qubit); + + // Log generated sequence if LOG is turned on + const char *env_p = std::getenv("LOG"); + try { + if (env_p != nullptr && std::stoi(env_p) != 0) { + for (const auto &gate : gate_seq) { + std::cerr << gate.name << ", " << gate.wires << "," + << gate.inverse << "," << gate.params << std::endl; + } + } + } catch (std::exception &e) { + // Just do not print log + } + + return benchmarkGate(kernel, num_qubits, gate_seq); +} + +/** + * @brief Benchmark Pennylane-Lightning for a given generator + * + * @param argc Number of arguments + * @param argv Command line arguments + * @return Returns 0 is completed successfully + */ +int main(int argc, char *argv[]) { + namespace Constant = Gates::Constant; + // Handle input + if (argc != 5 && argc != 6) { // NOLINT(readability-magic-numbers) + std::cerr + << "Wrong number of inputs. User provided " << argc - 1 + << " inputs. \n" + << "Usage: " + std::string(argv[0]) + + " num_reps num_qubits kernel [generator|gate] [num_wires]\n" + "Examples: \n" + << "\t" << argv[0] << " 1000 10 PI GeneratorCRX\n" + << "\t" << argv[0] << " 1000 10 LM CRX\n" + << "\t" << argv[0] << " 1000 10 LM MutliRZ 3\n"; + return -1; + } + + size_t num_reps; + size_t num_qubits; + + try { + num_reps = std::stoi(argv[1]); + num_qubits = std::stoi(argv[2]); + } catch (std::exception &e) { + std::cerr << "Arguments num_reps and num_qubits must be integers." + << std::endl; + return -1; + } + + std::string_view kernel_name = argv[3]; + KernelType kernel = string_to_kernel(kernel_name); + if (kernel == KernelType::None) { + std::cerr << "Kernel " << kernel_name << " is unknown." << std::endl; + return 1; + } + + std::string_view gate_name = argv[4]; + if (!Util::array_has_elt(Util::second_elts_of(Constant::gate_names), + gate_name)) { + std::cerr << "Unknown gate name " << gate_name << " is provided" + << std::endl; + return 1; + } + + Gates::GateOperation gate_op = + Util::lookup(Util::reverse_pairs(Constant::gate_names), gate_name); + + size_t num_wires_for_multi_qubit = 0; + if (Util::array_has_elt(Constant::multi_qubit_gates, gate_op)) { + // User provided a multi-qubit gates + if (argc != 6) { + std::cerr << "One should provide the number of wires when using " + "multi qubit gates." + << std::endl; + return 1; + } + + try { + num_wires_for_multi_qubit = std::stoi(argv[5]); + } catch (std::exception &e) { + std::cerr << "Number of wires must be an integer" << std::endl; + return 1; + } + } + + std::random_device rd; + std::mt19937 re(rd()); + + double walltime = + runBenchmarkGate(re, kernel, std::string(gate_name), num_reps, + num_qubits, num_wires_for_multi_qubit); + + // Output walltime in csv format (Num Qubits, Time (milliseconds)) + std::cout << num_qubits << ", " << walltime / static_cast(num_reps) + << std::endl; + return 0; +} diff --git a/pennylane_lightning/src/examples/benchmark_gate_list.cpp b/pennylane_lightning/src/examples/benchmark_gate_list.cpp deleted file mode 100644 index 5910ad0884..0000000000 --- a/pennylane_lightning/src/examples/benchmark_gate_list.cpp +++ /dev/null @@ -1,223 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Constant.hpp" -#include "ExampleUtil.hpp" -#include "StateVectorManaged.hpp" - -using namespace Pennylane; -using namespace Pennylane::Gates; -using namespace Pennylane::Util; - -std::string_view strip(std::string_view str) { - auto start = str.find_first_not_of(" \t"); - auto end = str.find_last_not_of(" \t"); - return str.substr(start, end - start + 1); -} - -struct GateDesc { - size_t n_wires; // number of wires the gate applies to - size_t n_params; // number of parameters the gate requires -}; - -std::vector> -parseGateLists(std::string_view arg) { - namespace Constant = Gates::Constant; - std::map available_gates_wires; - - for (const auto &[gate_op, gate_name] : Constant::gate_names) { - if (!array_has_elt(Constant::multi_qubit_gates, gate_op)) { - // We do not support multi qubit gates yet - size_t n_wires = Util::lookup(Constant::gate_wires, gate_op); - size_t n_params = Util::lookup(Constant::gate_num_params, gate_op); - available_gates_wires.emplace(gate_name, - GateDesc{n_wires, n_params}); - } - } - - if (arg.empty()) { - return {}; - } - - std::vector> ops; - - if (auto pos = arg.find_first_of('['); pos != std::string_view::npos) { - // arg is a list "[...]" - auto start = pos + 1; - auto end = arg.find_last_of(']'); - if (end == std::string_view::npos) { - throw std::invalid_argument( - "Argument must contain operators within square brackets []."); - } - arg = arg.substr(start, end - start); - } - - size_t start; - size_t end = 0; - while ((start = arg.find_first_not_of(',', end)) != std::string::npos) { - end = arg.find(',', start); - auto op_name = strip(arg.substr(start, end - start)); - - auto iter = available_gates_wires.find(std::string(op_name)); - - if (iter == available_gates_wires.end()) { - std::ostringstream ss; - ss << "Given gate " << op_name - << " is not availabe"; // TODO: Change to std::format in C++20 - throw std::invalid_argument(ss.str()); - } - ops.emplace_back(*iter); - } - return ops; -} - -/** - * @brief Benchmark Pennylane-Lightning for a given gate set - * - * Example usage: - * - * $ gate_benchmark_oplist 10 22 # Benchmark using 10 random gates (sampled - * evenly from all possible gates) for 22 qubits - * $ gate_benchmark_oplist 100 20 [PauliX, CNOT] # Benchmark using 100 - * random gates (where each gate is PauliX or CNOT) for 20 qubits - * - * The whole supported gates are PauliX, PauliY, PauliZ, Hadamard, S, T, RX, RY, - * RZ, Rot, PhaseShift, CNOT, SWAP, ControlledPhaseShift, CRX, CRY, CRZ, CRot, - * Toffoli and CSWAP. - * - * @param argc Number of arguments - * @param argv Command line arguments - * @return Returns 0 is completed successfully - */ -int main(int argc, char *argv[]) { - using TestType = double; - - // Handle input - if (argc < 4) { - std::cerr << "Wrong number of inputs. User provided " << argc - 1 - << " inputs. " - << "Usage: " + std::string(argv[0]) + - " num_gate_reps num_qubits kernel [gate_lists]\n" - "\tExample: " - << argv[0] << " 1000 10 PI [PauliX, CNOT]" - << std::endl; // Change to std::format in C++20 - return -1; - } - - size_t num_gate_reps; - size_t num_qubits; - - try { - num_gate_reps = std::stoi(argv[1]); - num_qubits = std::stoi(argv[2]); - } catch (std::exception &e) { - std::cerr << "Arguments num_gate_reps and num_qubits must be integers." - << std::endl; - return -1; - } - - std::string_view kernel_name = argv[3]; - KernelType kernel = string_to_kernel(kernel_name); - if (kernel == KernelType::None) { - std::cerr << "Kernel " << kernel_name << " is unknown." << std::endl; - return 1; - } - - // Gate list is provided - std::string op_list_s; - { - std::ostringstream ss; - for (int idx = 4; idx < argc; idx++) { - ss << argv[idx] << " "; - } - op_list_s = ss.str(); - } - - std::vector> op_list; - try { - op_list = parseGateLists(op_list_s); - } catch (std::exception &e) { - std::cerr << e.what() << std::endl; - return 1; - } - - if (op_list.empty()) { - std::cerr << "Please provide a gate list." << std::endl; - return 1; - } - - // Generate random gate sequences - std::random_device rd; - std::mt19937 re(rd()); - - std::vector random_gate_names; - std::vector> random_gate_wires; - std::vector random_inverses; - std::vector> random_gate_parameters; - - std::uniform_int_distribution gate_dist(0, op_list.size() - 1); - std::uniform_int_distribution inverse_dist(0, 1); - std::uniform_real_distribution param_dist(0.0, 2 * M_PI); - std::uniform_int_distribution wire_dist(0, num_qubits - 1); - - auto gen_param = [¶m_dist, &re]() { return param_dist(re); }; - - for (uint32_t k = 0; k < num_gate_reps; k++) { - const auto &[op_name, gate_desc] = op_list[gate_dist(re)]; - - std::vector gate_params(gate_desc.n_params, 0.0); - std::generate(gate_params.begin(), gate_params.end(), gen_param); - - random_gate_names.emplace_back(op_name); - random_inverses.emplace_back(static_cast(inverse_dist(re))); - // random_gate_wires.emplace_back(generateDistinctWires(re, num_qubits, - // gate_desc.n_wires)); - random_gate_wires.emplace_back( - generateNeighboringWires(re, num_qubits, gate_desc.n_wires)); - random_gate_parameters.emplace_back(std::move(gate_params)); - } - - // Log generated sequence if LOG is turned on - const char *env_p = std::getenv("LOG"); - try { - if (env_p != nullptr && std::stoi(env_p) != 0) { - for (size_t gate_rep = 0; gate_rep < num_gate_reps; gate_rep++) { - std::cerr << random_gate_names[gate_rep] << ", " - << random_gate_wires[gate_rep] << ", " - << random_gate_parameters[gate_rep] << std::endl; - } - } - } catch (std::exception &e) { - // Just do not print log - } - - // Run benchmark. Total num_gate_reps number of gates is used. - Pennylane::StateVectorManaged svdat{num_qubits}; - std::chrono::time_point t_start; - std::chrono::time_point t_end; - t_start = std::chrono::high_resolution_clock::now(); - - for (size_t gate_rep = 0; gate_rep < num_gate_reps; gate_rep++) { - svdat.applyOperation(kernel, std::string(random_gate_names[gate_rep]), - random_gate_wires[gate_rep], - random_inverses[gate_rep], - random_gate_parameters[gate_rep]); - } - - t_end = std::chrono::high_resolution_clock::now(); - - // Output walltime in csv format (Num Qubits, Time (milliseconds)) - const auto walltime = - 0.001 * ((std::chrono::duration_cast( - t_end - t_start)) - .count()); - std::cout << num_qubits << ", " - << walltime / static_cast(num_gate_reps) << std::endl; - return 0; -} diff --git a/pennylane_lightning/src/examples/benchmark_generator.cpp b/pennylane_lightning/src/examples/benchmark_generator.cpp new file mode 100644 index 0000000000..0753b57e6a --- /dev/null +++ b/pennylane_lightning/src/examples/benchmark_generator.cpp @@ -0,0 +1,202 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Constant.hpp" +#include "DynamicDispatcher.hpp" +#include "ExampleUtil.hpp" +#include "StateVectorManagedCPU.hpp" + +#ifdef USE_SINGLE_PRECISION +using PrecisionT = float; +#pragma message "Using single precision" +#else +using PrecisionT = double; +#endif + +using namespace Pennylane; +using namespace Pennylane::Gates; +using namespace Pennylane::Util; + +auto generatorOp(const std::string_view &name) -> Gates::GeneratorOperation { + auto &dispatcher = DynamicDispatcher::getInstance(); + return dispatcher.strToGeneratorOp(std::string(name)); +} + +struct GeneratorDesc { + std::string name; + std::vector wires; + bool inverse; + + template + GeneratorDesc(Arg0 &&arg0, Arg1 &&arg1, Arg2 &&arg2) + : name{std::forward(arg0)}, wires{std::forward(arg1)}, + inverse{std::forward(arg2)} {} +}; + +std::ostream &operator<<(std::ostream &os, GeneratorDesc &desc) { + os << desc.name << ", " << desc.wires << "," << desc.inverse << std::endl; + return os; +} + +template +auto generateGeneratorSequence(RandomEngine &re, + const GeneratorOperation gntr_op, + const size_t num_reps, const size_t num_qubits, + const size_t num_wires_for_multi_qubit) + -> std::vector { + namespace Constant = Gates::Constant; + using Gates::GeneratorOperation; + + const auto gntr_name = + Util::lookup(Constant::generator_names, gntr_op).substr(9); + + const size_t num_wires = [=]() { + if (Util::array_has_elt(Constant::multi_qubit_generators, gntr_op)) { + // if multi qubit gate + return num_wires_for_multi_qubit; + } + return Util::lookup(Constant::generator_wires, gntr_op); + }(); + + std::vector gntr_seq; + std::uniform_int_distribution inverse_dist(0, 1); + + for (uint32_t k = 0; k < num_reps; k++) { + + bool inverse = static_cast(inverse_dist(re)); + auto wires = generateNeighboringWires(re, num_qubits, num_wires); + + gntr_seq.emplace_back(gntr_name, std::move(wires), inverse); + } + return gntr_seq; +} + +double benchmarkGenerator(KernelType kernel, const size_t num_qubits, + const std::vector &gntr_seq) { + // Run benchmark. Total num_reps number of gates is used. + StateVectorManagedCPU svdat{num_qubits}; + + std::chrono::time_point t_start = + std::chrono::high_resolution_clock::now(); + for (const auto &gntr : gntr_seq) { + [[maybe_unused]] PrecisionT scale = + svdat.applyGenerator(kernel, gntr.name, gntr.wires, gntr.inverse); + } + std::chrono::time_point t_end = + std::chrono::high_resolution_clock::now(); + + return std::chrono::duration(t_end - t_start).count(); +} + +template +double runBenchmarkGenerator(RandomEngine &re, KernelType kernel, + const GeneratorOperation gntr_op, size_t num_reps, + size_t num_qubits, + size_t num_wires_for_multi_qubit) { + auto gntr_seq = generateGeneratorSequence(re, gntr_op, num_reps, num_qubits, + num_wires_for_multi_qubit); + + // Log generated sequence if LOG is turned on + const char *env_p = std::getenv("LOG"); + try { + if (env_p != nullptr && std::stoi(env_p) != 0) { + for (const auto &gntr : gntr_seq) { + std::cerr << gntr.name << ", " << gntr.wires << "," + << gntr.inverse << std::endl; + } + } + } catch (std::exception &e) { + // Just do not print log + } + + return benchmarkGenerator(kernel, num_qubits, gntr_seq); +} + +/** + * @brief Benchmark Pennylane-Lightning for a given generator + * + * @param argc Number of arguments + * @param argv Command line arguments + * @return Returns 0 is completed successfully + */ +int main(int argc, char *argv[]) { + namespace Constant = Gates::Constant; + // Handle input + if (argc != 5 && argc != 6) { // NOLINT(readability-magic-numbers) + std::cerr + << "Wrong number of inputs. User provided " << argc - 1 + << " inputs. \n" + << "Usage: " + std::string(argv[0]) + + " num_reps num_qubits kernel [generator|gate] [num_wires]\n" + "Examples: \n" + << "\t" << argv[0] << " 1000 10 PI GeneratorCRX\n" + << "\t" << argv[0] << " 1000 10 LM CRX\n" + << "\t" << argv[0] << " 1000 10 LM MutliRZ 3\n"; + return -1; + } + + size_t num_reps; + size_t num_qubits; + + try { + num_reps = std::stoi(argv[1]); + num_qubits = std::stoi(argv[2]); + } catch (std::exception &e) { + std::cerr << "Arguments num_reps and num_qubits must be integers." + << std::endl; + return -1; + } + + std::string_view kernel_name = argv[3]; + KernelType kernel = string_to_kernel(kernel_name); + if (kernel == KernelType::None) { + std::cerr << "Kernel " << kernel_name << " is unknown." << std::endl; + return 1; + } + + std::string_view gntr_name = argv[4]; + Gates::GeneratorOperation gntr_op; + + try { + gntr_op = generatorOp(gntr_name); + } catch (std::exception &e) { + std::cout << "Unknown generator " + std::string(gntr_name) + " provided" + << std::endl; + return 1; + } + + size_t num_wires_for_multi_qubit = 0; + if (Util::array_has_elt(Constant::multi_qubit_generators, gntr_op)) { + // User provided a multi-qubit gates + if (argc != 6) { + std::cerr << "One should provide the number of wires when using " + "multi qubit generators." + << std::endl; + return 1; + } + + try { + num_wires_for_multi_qubit = std::stoi(argv[5]); + } catch (std::exception &e) { + std::cerr << "Number of wires must be an integer" << std::endl; + return 1; + } + } + + std::random_device rd; + std::mt19937 re(rd()); + + double walltime = runBenchmarkGenerator( + re, kernel, gntr_op, num_reps, num_qubits, num_wires_for_multi_qubit); + + // Output walltime in csv format (Num Qubits, Time (milliseconds)) + std::cout << num_qubits << ", " << walltime / static_cast(num_reps) + << std::endl; + return 0; +} diff --git a/pennylane_lightning/src/examples/benchmark_matrix.cpp b/pennylane_lightning/src/examples/benchmark_matrix.cpp new file mode 100644 index 0000000000..9d297db91c --- /dev/null +++ b/pennylane_lightning/src/examples/benchmark_matrix.cpp @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Constant.hpp" +#include "ExampleUtil.hpp" +#include "LinearAlgebra.hpp" +#include "StateVectorManagedCPU.hpp" + +#ifdef USE_SINGLE_PRECISION +using PrecisionT = float; +#pragma message "Using single precision" +#else +using PrecisionT = double; +#endif + +using namespace Pennylane; +using namespace Pennylane::Gates; +using namespace Pennylane::Util; + +struct MatOpDesc { + std::vector wires; + bool inverse; + std::vector> mat; + + template + MatOpDesc(Arg0 &&arg0, Arg1 &&arg1, Arg2 &&arg2) + : wires{std::forward(arg0)}, inverse{std::forward(arg1)}, + mat{std::forward(arg2)} {} +}; + +template +auto generateMatrixSequence(RandomEngine &re, const size_t num_reps, + const size_t num_qubits, const size_t num_wires) + -> std::vector { + + std::vector matrix_seq; + matrix_seq.reserve(num_reps); + std::uniform_int_distribution inverse_dist(0, 1); + for (uint32_t k = 0; k < num_reps; k++) { + bool inverse = static_cast(inverse_dist(re)); + auto wires = generateNeighboringWires(re, num_qubits, num_wires); + + matrix_seq.emplace_back(std::move(wires), inverse, + Util::randomUnitary(re, num_wires)); + } + return matrix_seq; +} + +double benchmarkMatrix(KernelType kernel, const size_t num_qubits, + const std::vector &mat_seq) { + // Run benchmark. Total num_reps number of gates is used. + StateVectorManagedCPU svdat{num_qubits}; + + std::chrono::time_point t_start = + std::chrono::high_resolution_clock::now(); + for (const auto &mat_desc : mat_seq) { + svdat.applyMatrix(kernel, mat_desc.mat.data(), mat_desc.wires, + mat_desc.inverse); + } + std::chrono::time_point t_end = + std::chrono::high_resolution_clock::now(); + + return std::chrono::duration(t_end - t_start).count(); +} + +template +double runBenchmarkMatrix(RandomEngine &re, KernelType kernel, size_t num_reps, + size_t num_qubits, size_t num_wires) { + auto mat_seq = generateMatrixSequence(re, num_reps, num_qubits, num_wires); + + // Log generated sequence if LOG is turned on + const char *env_p = std::getenv("LOG"); + try { + if (env_p != nullptr && std::stoi(env_p) != 0) { + for (const auto &mat_desc : mat_seq) { + std::cerr << mat_desc.wires << ", " << mat_desc.inverse << ", " + << mat_desc.mat << std::endl; + } + } + } catch (std::exception &e) { + // Just do not print log + } + + return benchmarkMatrix(kernel, num_qubits, mat_seq); +} + +/** + * @brief Benchmark Pennylane-Lightning for a given generator + * + * @param argc Number of arguments + * @param argv Command line arguments + * @return Returns 0 is completed successfully + */ +int main(int argc, char *argv[]) { + namespace Constant = Gates::Constant; + // Handle input + if (argc != 5) { // NOLINT(readability-magic-numbers) + std::cerr << "Wrong number of inputs. User provided " << argc - 1 + << " inputs. \n" + << "Usage: " + std::string(argv[0]) + + " num_reps num_qubits kernel num_wires\n" + "Examples: \n" + << "\t" << argv[0] << " 1000 10 PI 4\n"; + return -1; + } + + size_t num_reps; + size_t num_qubits; + size_t num_wires; + + try { + num_reps = std::stoi(argv[1]); + num_qubits = std::stoi(argv[2]); + num_wires = std::stoi(argv[4]); + } catch (std::exception &e) { + std::cerr << "Arguments num_reps and num_qubits must be integers." + << std::endl; + return -1; + } + + std::string_view kernel_name = argv[3]; + KernelType kernel = string_to_kernel(kernel_name); + if (kernel == KernelType::None) { + std::cerr << "Kernel " << kernel_name << " is unknown." << std::endl; + return 1; + } + + std::random_device rd; + std::mt19937 re(rd()); + + double walltime = + runBenchmarkMatrix(re, kernel, num_reps, num_qubits, num_wires); + + // Output walltime in csv format (Num Qubits, Time (milliseconds)) + std::cout << num_qubits << ", " << walltime / static_cast(num_reps) + << std::endl; + return 0; +} diff --git a/pennylane_lightning/src/examples/benchmark_multi_rz.cpp b/pennylane_lightning/src/examples/benchmark_multi_rz.cpp deleted file mode 100644 index 49bac2ead2..0000000000 --- a/pennylane_lightning/src/examples/benchmark_multi_rz.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include "ExampleUtil.hpp" -#include "StateVectorCPU.hpp" - -#include -#include -#include -#include - -using namespace Pennylane; -using namespace Pennylane::Gates; - -constexpr uint32_t seed = 1337; - -int main(int argc, char *argv[]) { - using TestType = double; - - if (argc != 5) { // NOLINT(readability-magic-numbers) - std::cout << "Usage: " << argv[0] - << " num_gate_reps num_qubits num_wires kernel" << std::endl; - return 1; - } - - size_t num_gate_reps; - size_t num_qubits; - size_t num_wires; - - try { - num_gate_reps = std::stoi(argv[1]); - num_qubits = std::stoi(argv[2]); - num_wires = std::stoi(argv[3]); - } catch (std::exception &e) { - std::cerr << "Arguments must be integers." << std::endl; - return 1; - } - - std::string_view kernel_name = argv[4]; - KernelType kernel = string_to_kernel(kernel_name); - if (kernel == KernelType::None) { - std::cerr << "Kernel " << kernel_name << " is unknown." << std::endl; - return 1; - } - - std::mt19937 re{seed}; // NOLINT(readability-magic-number) - std::uniform_real_distribution param_dist(-M_PI, M_PI); - - std::vector> wires; - std::vector params; - - wires.reserve(num_gate_reps); - params.reserve(num_gate_reps); - - for (size_t gate_rep = 0; gate_rep < num_gate_reps; gate_rep++) { - wires.emplace_back(generateDistinctWires(re, num_qubits, num_wires)); - params.emplace_back(param_dist(re)); - } - - StateVectorCPU sv{num_qubits}; - - std::chrono::time_point t_start = - std::chrono::high_resolution_clock::now(); - - for (size_t gate_rep = 0; gate_rep < num_gate_reps; gate_rep++) { - sv.applyOperation(kernel, "MultiRZ", wires[gate_rep], false, - {params[gate_rep]}); - } - - std::chrono::time_point t_end = - std::chrono::high_resolution_clock::now(); - const auto walltime = - 0.001 * ((std::chrono::duration_cast( - t_end - t_start)) - .count()); - std::cout << num_qubits << ", " - << walltime / static_cast(num_gate_reps) << std::endl; - - return 0; -} diff --git a/pennylane_lightning/src/examples/benchmark_operation.cpp b/pennylane_lightning/src/examples/benchmark_operation.cpp deleted file mode 100644 index 0978a90550..0000000000 --- a/pennylane_lightning/src/examples/benchmark_operation.cpp +++ /dev/null @@ -1,210 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Constant.hpp" -#include "ExampleUtil.hpp" -#include "StateVectorCPU.hpp" - -#ifdef USE_SINGLE_PRECISION -using PrecisionT = float; -#pragma message "Using single precision" -#else -using PrecisionT = double; -#endif - -using namespace Pennylane; -using namespace Pennylane::Gates; -using namespace Pennylane::Util; - -std::string_view strip(std::string_view str) { - auto start = str.find_first_not_of(" \t"); - auto end = str.find_last_not_of(" \t"); - return str.substr(start, end - start + 1); -} - -template -double benchmark_gate(RandomEngine &re, KernelType kernel, - const std::string &gate_name, const size_t num_reps, - const size_t num_qubits) { - const GateOperation gate_op = Util::lookup( - Util::reverse_pairs(Constant::gate_names), std::string_view(gate_name)); - const size_t num_wires = Util::lookup(Constant::gate_wires, gate_op); - const size_t num_params = Util::lookup(Constant::gate_num_params, gate_op); - - // Generate random generator sequences - std::vector> random_wires; - std::vector random_inverses; - std::vector> random_params; - random_wires.reserve(num_reps); - random_inverses.reserve(num_reps); - random_params.reserve(num_reps); - - std::uniform_int_distribution inverse_dist(0, 1); - std::uniform_real_distribution param_dist(0.0, 2 * M_PI); - - for (uint32_t k = 0; k < num_reps; k++) { - std::vector gate_params; - gate_params.reserve(num_params); - - random_inverses.emplace_back(static_cast(inverse_dist(re))); - random_wires.emplace_back( - generateNeighboringWires(re, num_qubits, num_wires)); - - for (size_t idx = 0; idx < num_params; idx++) { - gate_params.emplace_back(param_dist(re)); - } - random_params.emplace_back(std::move(gate_params)); - } - - // Log generated sequence if LOG is turned on - const char *env_p = std::getenv("LOG"); - try { - if (env_p != nullptr && std::stoi(env_p) != 0) { - for (size_t gate_rep = 0; gate_rep < num_reps; gate_rep++) { - std::cerr << gate_name << ", " << random_wires[gate_rep] << "," - << random_inverses[gate_rep] << "," - << random_params[gate_rep] << std::endl; - } - } - } catch (std::exception &e) { - // Just do not print log - } - - // Run benchmark. Total num_reps number of gates is used. - StateVectorCPU svdat{num_qubits}; - - std::chrono::time_point t_start = - std::chrono::high_resolution_clock::now(); - for (size_t gate_rep = 0; gate_rep < num_reps; gate_rep++) { - svdat.applyOperation(kernel, gate_name, random_wires[gate_rep], - random_inverses[gate_rep], - random_params[gate_rep]); - } - std::chrono::time_point t_end = - std::chrono::high_resolution_clock::now(); - - return std::chrono::duration(t_end - t_start).count(); -} - -template -double benchmark_generator(RandomEngine &re, KernelType kernel, - const std::string &gntr_name, const size_t num_reps, - const size_t num_qubits) { - const auto gntr_name_without_prefix = gntr_name.substr(9); - const GeneratorOperation gntr_op = - Util::lookup(Util::reverse_pairs(Constant::generator_names), - std::string_view(gntr_name)); - const size_t num_wires = Util::lookup(Constant::generator_wires, gntr_op); - - // Generate random generator sequences - std::vector> random_wires; - std::vector random_inverses; - random_wires.reserve(num_reps); - random_inverses.reserve(num_reps); - - std::uniform_int_distribution inverse_dist(0, 1); - - for (uint32_t k = 0; k < num_reps; k++) { - random_inverses.emplace_back(static_cast(inverse_dist(re))); - random_wires.emplace_back( - generateNeighboringWires(re, num_qubits, num_wires)); - } - - // Log generated sequence if LOG is turned on - const char *env_p = std::getenv("LOG"); - try { - if (env_p != nullptr && std::stoi(env_p) != 0) { - for (size_t gate_rep = 0; gate_rep < num_reps; gate_rep++) { - std::cerr << gntr_name << ", " << random_wires[gate_rep] << "," - << random_inverses[gate_rep] << std::endl; - } - } - } catch (std::exception &e) { - // Just do not print log - } - - // Run benchmark. Total num_reps number of gates is used. - StateVectorCPU svdat{num_qubits}; - - std::chrono::time_point t_start = - std::chrono::high_resolution_clock::now(); - for (size_t gate_rep = 0; gate_rep < num_reps; gate_rep++) { - [[maybe_unused]] auto scale = svdat.applyGenerator( - kernel, gntr_name_without_prefix, random_wires[gate_rep], - random_inverses[gate_rep]); - } - std::chrono::time_point t_end = - std::chrono::high_resolution_clock::now(); - - return std::chrono::duration(t_end - t_start).count(); -} - -/** - * @brief Benchmark Pennylane-Lightning for a given generator - * - * @param argc Number of arguments - * @param argv Command line arguments - * @return Returns 0 is completed successfully - */ -int main(int argc, char *argv[]) { - // Handle input - if (argc < 5) { // NOLINT(readability-magic-numbers) - std::cerr << "Wrong number of inputs. User provided " << argc - 1 - << " inputs. \n" - << "Usage: " + std::string(argv[0]) + - " num_reps num_qubits kernel [generator|gate]\n" - "Examples: \n" - "\t" - << argv[0] << " 1000 10 PI GeneratorCRX\n" - << "\t" << argv[0] << " 1000 10 LM CRX" - << std::endl; // Change to std::format in C++20 - return -1; - } - - size_t num_reps; - size_t num_qubits; - - try { - num_reps = std::stoi(argv[1]); - num_qubits = std::stoi(argv[2]); - } catch (std::exception &e) { - std::cerr << "Arguments num_reps and num_qubits must be integers." - << std::endl; - return -1; - } - - std::string_view kernel_name = argv[3]; - KernelType kernel = string_to_kernel(kernel_name); - if (kernel == KernelType::None) { - std::cerr << "Kernel " << kernel_name << " is unknown." << std::endl; - return 1; - } - - const std::string_view gate_or_gntr_name = argv[4]; - const std::string_view generator_prefix = "Generator"; - - std::random_device rd; - std::mt19937 re(rd()); - - double walltime; - - if (gate_or_gntr_name.substr(0, generator_prefix.length()) == - generator_prefix) { // generators - walltime = benchmark_generator( - re, kernel, std::string(gate_or_gntr_name), num_reps, num_qubits); - } else { - walltime = benchmark_gate(re, kernel, std::string(gate_or_gntr_name), - num_reps, num_qubits); - } - - // Output walltime in csv format (Num Qubits, Time (milliseconds)) - std::cout << num_qubits << ", " << walltime / static_cast(num_reps) - << std::endl; - return 0; -} diff --git a/pennylane_lightning/src/examples/gate_benchmark_oplist.cpp b/pennylane_lightning/src/examples/gate_benchmark_oplist.cpp deleted file mode 100644 index 3ec93a272b..0000000000 --- a/pennylane_lightning/src/examples/gate_benchmark_oplist.cpp +++ /dev/null @@ -1,227 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Constant.hpp" -#include "ExampleUtil.hpp" -#include "StateVectorManaged.hpp" - -using namespace Pennylane; -using namespace Pennylane::Gates; -using namespace Pennylane::Util; - -std::string_view strip(std::string_view str) { - auto start = str.find_first_not_of(" \t"); - auto end = str.find_last_not_of(" \t"); - return str.substr(start, end - start + 1); -} - -struct GateDesc { - size_t n_wires; // number of wires the gate applies to - size_t n_params; // number of parameters the gate requires -}; - -std::vector> -parseGateLists(std::string_view arg) { - namespace Constant = Gates::Constant; - std::map available_gates_wires; - - for (const auto &[gate_op, gate_name] : Constant::gate_names) { - if (!array_has_elt(Constant::multi_qubit_gates, gate_op)) { - // We do not support multi qubit gates yet - size_t n_wires = Util::lookup(Constant::gate_wires, gate_op); - size_t n_params = Util::lookup(Constant::gate_num_params, gate_op); - available_gates_wires.emplace(gate_name, - GateDesc{n_wires, n_params}); - } - } - - if (arg.empty()) { - /* - return std::vector>( - available_gates_wires.begin(), available_gates_wires.end()); - */ - return {}; - } - - std::vector> ops; - - if (auto pos = arg.find_first_of('['); pos != std::string_view::npos) { - // arg is a list "[...]" - auto start = pos + 1; - auto end = arg.find_last_of(']'); - if (end == std::string_view::npos) { - throw std::invalid_argument( - "Argument must contain operators within square brackets []."); - } - arg = arg.substr(start, end - start); - } - - size_t start; - size_t end = 0; - while ((start = arg.find_first_not_of(',', end)) != std::string::npos) { - end = arg.find(',', start); - auto op_name = strip(arg.substr(start, end - start)); - - auto iter = available_gates_wires.find(std::string(op_name)); - - if (iter == available_gates_wires.end()) { - std::ostringstream ss; - ss << "Given gate " << op_name - << " is not availabe"; // TODO: Change to std::format in C++20 - throw std::invalid_argument(ss.str()); - } - ops.emplace_back(*iter); - } - return ops; -} - -/** - * @brief Benchmark Pennylane-Lightning for a given gate set - * - * Example usage: - * - * $ gate_benchmark_oplist 10 22 # Benchmark using 10 random gates (sampled - * evenly from all possible gates) for 22 qubits - * $ gate_benchmark_oplist 100 20 [PauliX, CNOT] # Benchmark using 100 - * random gates (where each gate is PauliX or CNOT) for 20 qubits - * - * The whole supported gates are PauliX, PauliY, PauliZ, Hadamard, S, T, RX, RY, - * RZ, Rot, PhaseShift, CNOT, SWAP, ControlledPhaseShift, CRX, CRY, CRZ, CRot, - * Toffoli and CSWAP. - * - * @param argc Number of arguments - * @param argv Command line arguments - * @return Returns 0 is completed successfully - */ -int main(int argc, char *argv[]) { - using TestType = double; - - // Handle input - if (argc < 4) { - std::cerr << "Wrong number of inputs. User provided " << argc - 1 - << " inputs. " - << "Usage: " + std::string(argv[0]) + - " num_gate_reps num_qubits kernel [gate_lists]\n" - "\tExample: " - << argv[0] << " 1000 10 PI [PauliX, CNOT]" - << std::endl; // Change to std::format in C++20 - return -1; - } - - size_t num_gate_reps; - size_t num_qubits; - - try { - num_gate_reps = std::stoi(argv[1]); - num_qubits = std::stoi(argv[2]); - } catch (std::exception &e) { - std::cerr << "Arguments num_gate_reps and num_qubits must be integers." - << std::endl; - return -1; - } - - std::string_view kernel_name = argv[3]; - KernelType kernel = string_to_kernel(kernel_name); - if (kernel == KernelType::None) { - std::cerr << "Kernel " << kernel_name << " is unknown." << std::endl; - return 1; - } - - // Gate list is provided - std::string op_list_s; - { - std::ostringstream ss; - for (int idx = 4; idx < argc; idx++) { - ss << argv[idx] << " "; - } - op_list_s = ss.str(); - } - - std::vector> op_list; - try { - op_list = parseGateLists(op_list_s); - } catch (std::exception &e) { - std::cerr << e.what() << std::endl; - return 1; - } - - if (op_list.empty()) { - std::cerr << "Please provide a gate list." << std::endl; - return 1; - } - - // Generate random gate sequences - std::random_device rd; - std::mt19937 re(rd()); - - std::vector random_gate_names; - std::vector> random_gate_wires; - std::vector random_inverses; - std::vector> random_gate_parameters; - - std::uniform_int_distribution gate_dist(0, op_list.size() - 1); - std::uniform_int_distribution inverse_dist(0, 1); - std::uniform_real_distribution param_dist(0.0, 2 * M_PI); - std::uniform_int_distribution wire_dist(0, num_qubits - 1); - - auto gen_param = [¶m_dist, &re]() { return param_dist(re); }; - - for (uint32_t k = 0; k < num_gate_reps; k++) { - const auto &[op_name, gate_desc] = op_list[gate_dist(re)]; - - std::vector gate_params(gate_desc.n_params, 0.0); - std::generate(gate_params.begin(), gate_params.end(), gen_param); - - random_gate_names.emplace_back(op_name); - random_inverses.emplace_back(static_cast(inverse_dist(re))); - // random_gate_wires.emplace_back(generateDistinctWires(re, num_qubits, - // gate_desc.n_wires)); - random_gate_wires.emplace_back( - generateNeighboringWires(re, num_qubits, gate_desc.n_wires)); - random_gate_parameters.emplace_back(std::move(gate_params)); - } - - // Log generated sequence if LOG is turned on - const char *env_p = std::getenv("LOG"); - try { - if (env_p != nullptr && std::stoi(env_p) != 0) { - for (size_t gate_rep = 0; gate_rep < num_gate_reps; gate_rep++) { - std::cerr << random_gate_names[gate_rep] << ", " - << random_gate_wires[gate_rep] << ", " - << random_gate_parameters[gate_rep] << std::endl; - } - } - } catch (std::exception &e) { - // Just do not print log - } - - // Run benchmark. Total num_gate_reps number of gates is used. - Pennylane::StateVectorManaged svdat{num_qubits}; - std::chrono::time_point t_start; - std::chrono::time_point t_end; - t_start = std::chrono::high_resolution_clock::now(); - - for (size_t gate_rep = 0; gate_rep < num_gate_reps; gate_rep++) { - svdat.applyOperation(kernel, std::string(random_gate_names[gate_rep]), - random_gate_wires[gate_rep], - random_inverses[gate_rep], - random_gate_parameters[gate_rep]); - } - - t_end = std::chrono::high_resolution_clock::now(); - - // Output walltime in csv format (Num Qubits, Time (milliseconds)) - const auto walltime = - 0.001 * ((std::chrono::duration_cast( - t_end - t_start)) - .count()); - std::cout << num_qubits << ", " - << walltime / static_cast(num_gate_reps) << std::endl; - return 0; -} diff --git a/pennylane_lightning/src/examples/plot_gate_benchmark.py b/pennylane_lightning/src/examples/plot_benchmark.py similarity index 100% rename from pennylane_lightning/src/examples/plot_gate_benchmark.py rename to pennylane_lightning/src/examples/plot_benchmark.py diff --git a/pennylane_lightning/src/examples/run_benchmark.sh b/pennylane_lightning/src/examples/run_benchmark.sh new file mode 100755 index 0000000000..e8f7daf657 --- /dev/null +++ b/pennylane_lightning/src/examples/run_benchmark.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +currdir=$(pwd) + +if [ "$#" -lt 2 ]; then + echo "Usage: $0 Kernel Gate [Number of wires (for MultiRZ)]" + exit 1 +fi + +# Parameter initialization +min_num_qubits=8 +max_num_qubits=24 +num_qubits_increment=2 +num_gate_reps=1000 +kernel="$1" +gate="$2" +path_to_binary="./benchmark_gate" + +compiler_info=$( $path_to_csv + +if [[ "$#" -eq 3 ]]; then + command_format="$path_to_binary ${num_gate_reps} %d ${kernel} ${gate} $3" +else + command_format="$path_to_binary ${num_gate_reps} %d ${kernel} ${gate}" +fi + +# Generate data +for ((num_qubits=$min_num_qubits; num_qubits<$max_num_qubits+1; num_qubits+=$num_qubits_increment)); do + echo "Gate repetition=$num_gate_reps, num_qubits=$num_qubits, kernel=$kernel, gate=$gate" + command=$(printf "$command_format" "$num_qubits") + $command >> $path_to_csv +done diff --git a/pennylane_lightning/src/examples/run_gate_benchmark.sh b/pennylane_lightning/src/examples/run_gate_benchmark.sh deleted file mode 100755 index 315c3ebdda..0000000000 --- a/pennylane_lightning/src/examples/run_gate_benchmark.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash - -currdir=$(pwd) - -if [ "$#" -lt 2 ]; then - echo "Usage: $0 Kernel Gate [Number of wires (for MultiRZ)]" - exit 1 -fi - -# Parameter initialization -min_num_qubits=8 -max_num_qubits=24 -num_qubits_increment=2 -num_gate_reps=1000 -kernel="$1" -gate="$2" - -compiler_info=$( $path_to_csv - - # Generate data - for ((num_qubits=$min_num_qubits; num_qubits<$max_num_qubits+1; num_qubits+=$num_qubits_increment)); do - echo "Gate repetition=$num_gate_reps, num_qubits=$num_qubits, kernel=$kernel, gate=$gate" - $path_to_binary ${num_gate_reps} ${num_qubits} ${kernel} ${gate} >> $path_to_csv - done -else - num_wires="$3" - # Creating data file - binary_name="./benchmark_multi_rz" - path_to_binary="$currdir/$binary_name" - - resdir="$currdir/res_${compiler_info}" - mkdir -p $resdir - data_file_name="benchmark_${kernel}_${gate}_${num_wires}.csv" - path_to_csv="$resdir/$data_file_name" - echo "Creating $path_to_csv" - echo "Num Qubits, Time (milliseconds)" > $path_to_csv - - # Generate data - for ((num_qubits=$min_num_qubits; num_qubits<$max_num_qubits+1; num_qubits+=$num_qubits_increment)); do - echo "Gate repetition=$num_gate_reps, num_qubits=$num_qubits, kernel=$kernel, gate=$gate" - $path_to_binary ${num_gate_reps} ${num_qubits} ${num_wires} ${kernel} >> $path_to_csv - done -fi diff --git a/pennylane_lightning/src/examples/test.sh b/pennylane_lightning/src/examples/test.sh new file mode 100644 index 0000000000..bb03f7a040 --- /dev/null +++ b/pennylane_lightning/src/examples/test.sh @@ -0,0 +1 @@ +echo $# diff --git a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp index 5618bffc7b..2a0e75c529 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp @@ -256,28 +256,56 @@ class GateImplementationsLM : public PauliGenerator { size_t dim = 1U << wires.size(); std::vector indices; indices.resize(dim); + std::vector> coeffs_in(dim, 0.0); - for (size_t k = 0; k < Util::exp2(num_qubits); k += dim) { - std::vector> coeffs_in(dim); - std::vector> coeffs_out(dim); + if (inverse) { + for (size_t k = 0; k < Util::exp2(num_qubits); k += dim) { + + for (size_t inner_idx = 0; inner_idx < dim; inner_idx++) { + size_t idx = k | inner_idx; + size_t n_wires = wires.size(); + for (size_t pos = 0; pos < n_wires; pos++) { + idx = bitswap(idx, n_wires - pos - 1, + num_qubits - wires[pos] - 1); + } + indices[inner_idx] = idx; + coeffs_in[inner_idx] = arr[idx]; + } + + for (size_t i = 0; i < dim; i++) { + const auto idx = indices[i]; + arr[idx] = 0.0; - for (size_t inner_idx = 0; inner_idx < dim; inner_idx++) { - size_t idx = k | inner_idx; - size_t n_wires = wires.size(); - for (size_t pos = 0; pos < n_wires; pos++) { - idx = bitswap(idx, n_wires - pos - 1, - num_qubits - wires[pos] - 1); + for (size_t j = 0; j < dim; j++) { + const size_t base_idx = j * dim; + arr[idx] += + std::conj(matrix[base_idx + i]) * coeffs_in[j]; + } } - indices[inner_idx] = idx; - coeffs_in[inner_idx] = arr[idx]; } + } else { + for (size_t k = 0; k < Util::exp2(num_qubits); k += dim) { + + for (size_t inner_idx = 0; inner_idx < dim; inner_idx++) { + size_t idx = k | inner_idx; + size_t n_wires = wires.size(); + for (size_t pos = 0; pos < n_wires; pos++) { + idx = bitswap(idx, n_wires - pos - 1, + num_qubits - wires[pos] - 1); + } + indices[inner_idx] = idx; + coeffs_in[inner_idx] = arr[idx]; + } - Util::matrixVecProd(matrix, coeffs_in.data(), coeffs_out.data(), - dim, dim, - inverse ? Trans::Adjoint : Trans::NoTranspose); + for (size_t i = 0; i < dim; i++) { + const auto idx = indices[i]; + arr[idx] = 0.0; + const size_t base_idx = i * dim; - for (size_t inner_idx = 0; inner_idx < dim; inner_idx++) { - arr[indices[inner_idx]] = coeffs_out[inner_idx]; + for (size_t j = 0; j < dim; j++) { + arr[idx] += matrix[base_idx + j] * coeffs_in[j]; + } + } } } } diff --git a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp index b4314411f5..c2558b5021 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp @@ -206,23 +206,23 @@ class GateImplementationsPI : public PauliGenerator { const std::complex v10 = shiftedState[indices[2]]; const std::complex v11 = shiftedState[indices[3]]; - // NOLINTNEXTLINE(readability-magic-numbers) shiftedState[indices[0]] = + // NOLINTNEXTLINE(readability-magic-numbers) matrix[0b0000] * v00 + matrix[0b0001] * v01 + // NOLINTNEXTLINE(readability-magic-numbers) matrix[0b0010] * v10 + matrix[0b0011] * v11; - // NOLINTNEXTLINE(readability-magic-numbers) shiftedState[indices[1]] = + // NOLINTNEXTLINE(readability-magic-numbers) matrix[0b0100] * v00 + matrix[0b0101] * v01 + // NOLINTNEXTLINE(readability-magic-numbers) matrix[0b0110] * v10 + matrix[0b0111] * v11; - // NOLINTNEXTLINE(readability-magic-numbers) shiftedState[indices[2]] = + // NOLINTNEXTLINE(readability-magic-numbers) matrix[0b1000] * v00 + matrix[0b1001] * v01 + // NOLINTNEXTLINE(readability-magic-numbers) matrix[0b1010] * v10 + matrix[0b1011] * v11; - // NOLINTNEXTLINE(readability-magic-numbers) shiftedState[indices[3]] = + // NOLINTNEXTLINE(readability-magic-numbers) matrix[0b1100] * v00 + matrix[0b1101] * v01 + // NOLINTNEXTLINE(readability-magic-numbers) matrix[0b1110] * v10 + matrix[0b1111] * v11; diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp index 46fc68ab81..e761cdeca4 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp @@ -295,16 +295,6 @@ template class DynamicDispatcher { const std::vector &wires, bool inverse) const { assert(num_qubits >= wires.size()); - switch (mat_op) { - case Gates::MatrixOperation::SingleQubitOp: - assert(wires.size() == 1); - break; - case Gates::MatrixOperation::TwoQubitOp: - assert(wires.size() == 2); - break; - default: - break; - } const auto iter = matrices_.find(std::make_pair(mat_op, kernel)); if (iter == matrices_.end()) { throw std::invalid_argument( diff --git a/pennylane_lightning/src/simulator/StateVectorBase.hpp b/pennylane_lightning/src/simulator/StateVectorBase.hpp index 9861ec0c39..4853754815 100644 --- a/pennylane_lightning/src/simulator/StateVectorBase.hpp +++ b/pennylane_lightning/src/simulator/StateVectorBase.hpp @@ -295,6 +295,110 @@ template class StateVectorBase { num_qubits_, opName, wires, adj); } + /** + * @brief Apply a general single qubit matrix to given wires. + * + * @param kernel Kernel to run the operation + * @param matrix Pointer to the array data. + * @param wires Wires to apply gate to. + * @param inverse Indicate whether inverse should be taken. + */ + inline void applySingleQubitOp(Gates::KernelType kernel, + const ComplexPrecisionT *matrix, + const std::vector &wires, + bool inverse = false) { + using Gates::MatrixOperation; + + assert(wires.size() == 1); + + auto &dispatcher = DynamicDispatcher::getInstance(); + auto *arr = getData(); + dispatcher.applyMatrix(kernel, arr, MatrixOperation::SingleQubitOp, + num_qubits_, matrix, wires, inverse); + } + + /** + * @brief Apply a general single qubit matrix to given wires. + * + * @param kernel Kernel to run the operation + * @param matrix Pointer to the array data. + * @param wires Wires to apply gate to. + * @param inverse Indicate whether inverse should be taken. + */ + inline void applyTwoQubitOp(Gates::KernelType kernel, + const ComplexPrecisionT *matrix, + const std::vector &wires, + bool inverse = false) { + using Gates::MatrixOperation; + + assert(wires.size() == 2); + + auto &dispatcher = DynamicDispatcher::getInstance(); + auto *arr = getData(); + dispatcher.applyMatrix(kernel, arr, MatrixOperation::TwoQubitOp, + num_qubits_, matrix, wires, inverse); + } + + /** + * @brief Apply a general multi qubit matrix to given wires. + * + * @param kernel Kernel to run the operation + * @param matrix Pointer to the array data. + * @param wires Wires to apply gate to. + * @param inverse Indicate whether inverse should be taken. + */ + inline void applyMultiQubitOp(Gates::KernelType kernel, + const ComplexPrecisionT *matrix, + const std::vector &wires, + bool inverse = false) { + using Gates::MatrixOperation; + + auto &dispatcher = DynamicDispatcher::getInstance(); + auto *arr = getData(); + dispatcher.applyMatrix(kernel, arr, MatrixOperation::MultiQubitOp, + num_qubits_, matrix, wires, inverse); + } + + /** + * @brief Apply a given matrix directly to the statevector read directly + * from numpy data. Data can be in 1D or 2D format. + * + * @param kernel Kernel to run the operation + * @param matrix Pointer to the array data. + * @param wires Wires to apply gate to. + * @param inverse Indicate whether inverse should be taken. + */ + inline void applyMatrix(Gates::KernelType kernel, + const ComplexPrecisionT *matrix, + const std::vector &wires, + bool inverse = false) { + using Gates::MatrixOperation; + + auto &dispatcher = DynamicDispatcher::getInstance(); + auto *arr = getData(); + + if (wires.empty()) { + throw std::invalid_argument( + "Number of wires must be larger than 0"); + } + + switch (wires.size()) { + case 1: + dispatcher.applyMatrix(kernel, arr, MatrixOperation::SingleQubitOp, + num_qubits_, matrix, wires, inverse); + return; + case 2: + dispatcher.applyMatrix(kernel, arr, MatrixOperation::TwoQubitOp, + num_qubits_, matrix, wires, inverse); + return; + default: + dispatcher.applyMatrix(kernel, arr, MatrixOperation::MultiQubitOp, + num_qubits_, matrix, wires, inverse); + return; + } + PL_UNREACHABLE; + } + /** * @brief Apply a given matrix directly to the statevector read directly * from numpy data. Data can be in 1D or 2D format. @@ -306,10 +410,7 @@ template class StateVectorBase { inline void applyMatrix(const ComplexPrecisionT *matrix, const std::vector &wires, bool inverse = false) { - namespace Constant = Gates::Constant; using Gates::MatrixOperation; - using Gates::SelectKernel; - using Gates::static_lookup; auto &dispatcher = DynamicDispatcher::getInstance(); auto *arr = getData(); diff --git a/pennylane_lightning/src/tests/TestHelpers.hpp b/pennylane_lightning/src/tests/TestHelpers.hpp index 723d03b10a..dc6056016b 100644 --- a/pennylane_lightning/src/tests/TestHelpers.hpp +++ b/pennylane_lightning/src/tests/TestHelpers.hpp @@ -210,24 +210,14 @@ auto createPlusState(size_t num_qubits) return res; } -/** - * @brief Calculate the squared norm of a vector - */ -template -auto squaredNorm(const std::complex *data, size_t data_size) - -> PrecisionT { - return std::transform_reduce( - data, data + data_size, PrecisionT{}, std::plus(), - static_cast &)>( - &std::norm)); -} - /** * @brief create a random state */ template auto createRandomState(RandomEngine &re, size_t num_qubits) -> TestVector> { + using Util::squaredNorm; + TestVector> res(1U << num_qubits, {0.0, 0.0}); std::uniform_real_distribution dist; for (size_t idx = 0; idx < (1U << num_qubits); idx++) { @@ -321,58 +311,6 @@ auto createParams(Gates::GateOperation op) -> std::vector { } return {}; } -/** - * @brief Generate random unitary matrix - * - * @return Generated unitary matrix in row-major format - */ -template -auto randomUnitary(RandomEngine &re, size_t num_qubits) - -> TestVector> { - using ComplexPrecisionT = std::complex; - const size_t dim = (1U << num_qubits); - TestVector res(dim * dim, ComplexPrecisionT{}); - - std::normal_distribution dist; - - auto generator = [&dist, &re]() -> ComplexPrecisionT { - return ComplexPrecisionT{dist(re), dist(re)}; - }; - - std::generate(res.begin(), res.end(), generator); - - // Simple algorithm to make rows orthogonal with Gram-Schmidt - // This algorithm is unstable but works for a small matrix. - // Use QR decomposition when we have LAPACK support. - - for (size_t row2 = 0; row2 < dim; row2++) { - ComplexPrecisionT *row2_p = res.data() + row2 * dim; - for (size_t row1 = 0; row1 < row2; row1++) { - const ComplexPrecisionT *row1_p = res.data() + row1 * dim; - ComplexPrecisionT dot12 = Util::innerProdC(row1_p, row2_p, dim); - ComplexPrecisionT dot11 = squaredNorm(row1_p, dim); - - // orthogonalize row2 - std::transform( - row2_p, row2_p + dim, row1_p, row2_p, - [scale = dot12 / dot11](auto &elt2, const auto &elt1) { - return elt2 - scale * elt1; - }); - } - } - - // Normalize each row - for (size_t row = 0; row < dim; row++) { - ComplexPrecisionT *row_p = res.data() + row * dim; - PrecisionT norm2 = std::sqrt(squaredNorm(row_p, dim)); - - // normalize row2 - std::transform(row_p, row_p + dim, row_p, [norm2](const auto c) { - return (static_cast(1.0) / norm2) * c; - }); - } - return res; -} template struct PrecisionToName; diff --git a/pennylane_lightning/src/tests/Test_AdjDiff.cpp b/pennylane_lightning/src/tests/Test_AdjDiff.cpp index 696d66d41d..d1f9e94136 100644 --- a/pennylane_lightning/src/tests/Test_AdjDiff.cpp +++ b/pennylane_lightning/src/tests/Test_AdjDiff.cpp @@ -50,7 +50,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=RX, Obs=Z", for (const auto &p : param) { auto ops = OpsData({"RX"}, {{p}}, {{0}}, {false}); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); cdata[0] = std::complex{1, 0}; StateVectorRawCPU psi(cdata.data(), cdata.size()); @@ -82,7 +82,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=RY, Obs=X", for (const auto &p : param) { auto ops = OpsData({"RY"}, {{p}}, {{0}}, {false}); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); cdata[0] = std::complex{1, 0}; StateVectorRawCPU psi(cdata.data(), cdata.size()); @@ -109,7 +109,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=RX, Obs=[Z,Z]", const size_t num_obs = 2; std::vector jacobian(num_obs * num_params, 0); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRawCPU psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -140,7 +140,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=[RX,RX,RX], Obs=[Z,Z,Z]", const size_t num_obs = 3; std::vector jacobian(num_obs * num_params, 0); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRawCPU psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -179,7 +179,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=[RX,RX,RX], Obs=[Z,Z,Z], " std::vector jacobian(num_obs * num_params, 0); std::vector t_params{0, 2}; - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRawCPU psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -214,7 +214,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=[RX,RX,RX], Obs=[ZZZ]", const size_t num_obs = 1; std::vector jacobian(num_obs * num_params, 0); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRawCPU psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -249,7 +249,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=Mixed, Obs=[XXX]", const size_t num_obs = 1; std::vector jacobian(num_obs * num_params, 0); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRawCPU psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp index 72eba17f63..512c33ee57 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp @@ -6,6 +6,7 @@ #include using namespace Pennylane; +using Util::randomUnitary; template using ApplyMatrixType = void (*)(std::complex *, size_t, diff --git a/pennylane_lightning/src/tests/Test_Internal.cpp b/pennylane_lightning/src/tests/Test_Internal.cpp index 819f472586..b595d5daeb 100644 --- a/pennylane_lightning/src/tests/Test_Internal.cpp +++ b/pennylane_lightning/src/tests/Test_Internal.cpp @@ -85,38 +85,6 @@ TEMPLATE_TEST_CASE("createProductState", "[Test_Internal]", float, double) { } } -/** - * @brief Test randomUnitary is correct - */ -TEMPLATE_TEST_CASE("randomUnitary", "[Test_Internal]", float, double) { - using PrecisionT = TestType; - - std::mt19937 re{1337}; - - for (size_t num_qubits = 1; num_qubits <= 5; num_qubits++) { - const size_t dim = (1U << num_qubits); - const auto unitary = randomUnitary(re, num_qubits); - - auto unitary_dagger = Util::Transpose(unitary, dim, dim); - std::transform( - unitary_dagger.begin(), unitary_dagger.end(), - unitary_dagger.begin(), - [](const std::complex &v) { return std::conj(v); }); - - std::vector> mat(dim * dim); - Util::matrixMatProd(unitary.data(), unitary_dagger.data(), mat.data(), - dim, dim, dim); - - std::vector> identity( - dim * dim, std::complex{}); - for (size_t i = 0; i < dim; i++) { - identity[i * dim + i] = std::complex{1.0, 0.0}; - } - - REQUIRE(mat == PLApprox(identity).margin(1e-5)); - } -} - size_t binomialCeff(size_t n, size_t r) { size_t num = 1; size_t dem = 1; diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp index 8ac67d087a..adebf08c66 100644 --- a/pennylane_lightning/src/tests/Test_Util.cpp +++ b/pennylane_lightning/src/tests/Test_Util.cpp @@ -608,3 +608,35 @@ TEST_CASE("Utility array and tuples", "[Util]") { std::pair("Four", 4), }); } + +/** + * @brief Test randomUnitary is correct + */ +TEMPLATE_TEST_CASE("randomUnitary", "[Test_Internal]", float, double) { + using PrecisionT = TestType; + + std::mt19937 re{1337}; + + for (size_t num_qubits = 1; num_qubits <= 5; num_qubits++) { + const size_t dim = (1U << num_qubits); + const auto unitary = Util::randomUnitary(re, num_qubits); + + auto unitary_dagger = Util::Transpose(unitary, dim, dim); + std::transform( + unitary_dagger.begin(), unitary_dagger.end(), + unitary_dagger.begin(), + [](const std::complex &v) { return std::conj(v); }); + + std::vector> mat(dim * dim); + Util::matrixMatProd(unitary.data(), unitary_dagger.data(), mat.data(), + dim, dim, dim); + + std::vector> identity( + dim * dim, std::complex{}); + for (size_t i = 0; i < dim; i++) { + identity[i * dim + i] = std::complex{1.0, 0.0}; + } + + REQUIRE(mat == PLApprox(identity).margin(1e-5)); + } +} diff --git a/pennylane_lightning/src/util/LinearAlgebra.hpp b/pennylane_lightning/src/util/LinearAlgebra.hpp index bdf0e35f99..9f6941f891 100644 --- a/pennylane_lightning/src/util/LinearAlgebra.hpp +++ b/pennylane_lightning/src/util/LinearAlgebra.hpp @@ -17,9 +17,11 @@ */ #pragma once +#include #include #include #include +#include #include #include "Util.hpp" @@ -746,4 +748,69 @@ inline auto matrixMatProd(const std::vector> m_left, return m_out; } + +/** + * @brief Calculate the squared norm of a vector + */ +template +auto squaredNorm(const std::complex *data, size_t data_size) + -> PrecisionT { + return std::transform_reduce( + data, data + data_size, PrecisionT{}, std::plus(), + static_cast &)>( + &std::norm)); +} + +/** + * @brief Generate random unitary matrix + * + * @return Generated unitary matrix in row-major format + */ +template +auto randomUnitary(RandomEngine &re, size_t num_qubits) + -> std::vector> { + using ComplexPrecisionT = std::complex; + const size_t dim = (1U << num_qubits); + std::vector res(dim * dim, ComplexPrecisionT{}); + + std::normal_distribution dist; + + auto generator = [&dist, &re]() -> ComplexPrecisionT { + return ComplexPrecisionT{dist(re), dist(re)}; + }; + + std::generate(res.begin(), res.end(), generator); + + // Simple algorithm to make rows orthogonal with Gram-Schmidt + // This algorithm is unstable but works for a small matrix. + // Use QR decomposition when we have LAPACK support. + + for (size_t row2 = 0; row2 < dim; row2++) { + ComplexPrecisionT *row2_p = res.data() + row2 * dim; + for (size_t row1 = 0; row1 < row2; row1++) { + const ComplexPrecisionT *row1_p = res.data() + row1 * dim; + ComplexPrecisionT dot12 = Util::innerProdC(row1_p, row2_p, dim); + ComplexPrecisionT dot11 = squaredNorm(row1_p, dim); + + // orthogonalize row2 + std::transform( + row2_p, row2_p + dim, row1_p, row2_p, + [scale = dot12 / dot11](auto &elt2, const auto &elt1) { + return elt2 - scale * elt1; + }); + } + } + + // Normalize each row + for (size_t row = 0; row < dim; row++) { + ComplexPrecisionT *row_p = res.data() + row * dim; + PrecisionT norm2 = std::sqrt(squaredNorm(row_p, dim)); + + // normalize row2 + std::transform(row_p, row_p + dim, row_p, [norm2](const auto c) { + return (static_cast(1.0) / norm2) * c; + }); + } + return res; +} } // namespace Pennylane::Util From a0df568b1df1f40445f74e46740637172d9186db Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 1 Mar 2022 23:24:52 -0500 Subject: [PATCH 05/94] Update benchmark --- .../src/examples/CMakeLists.txt | 12 +- .../src/examples/run_benchmark.py | 156 ++++++++++++++++++ .../src/examples/run_benchmark.sh | 23 ++- pennylane_lightning/src/examples/test.sh | 2 +- .../DefaultKernelsForStateVector.hpp | 2 +- 5 files changed, 180 insertions(+), 15 deletions(-) create mode 100644 pennylane_lightning/src/examples/run_benchmark.py diff --git a/pennylane_lightning/src/examples/CMakeLists.txt b/pennylane_lightning/src/examples/CMakeLists.txt index addb786ad6..8c7d0a900b 100644 --- a/pennylane_lightning/src/examples/CMakeLists.txt +++ b/pennylane_lightning/src/examples/CMakeLists.txt @@ -9,10 +9,9 @@ project("gate_benchmark" LANGUAGES CXX ) -# add_executable(gate_benchmark gate_benchmark.cpp) -# target_link_libraries(gate_benchmark lightning_utils lightning_simulator -# lightning_compile_options -# lightning_external_libs) +################################################################################ +# II. Set dependencies +################################################################################ add_library(lightning_examples INTERFACE) target_link_libraries(lightning_examples INTERFACE lightning_compile_options @@ -21,6 +20,11 @@ target_link_libraries(lightning_examples INTERFACE lightning_compile_options lightning_simulator lightning_utils) + +################################################################################ +# III. set executables +################################################################################ + add_executable(benchmark_gate benchmark_gate.cpp) target_link_libraries(benchmark_gate PRIVATE lightning_examples) diff --git a/pennylane_lightning/src/examples/run_benchmark.py b/pennylane_lightning/src/examples/run_benchmark.py new file mode 100644 index 0000000000..1a7febfa27 --- /dev/null +++ b/pennylane_lightning/src/examples/run_benchmark.py @@ -0,0 +1,156 @@ +import subprocess +import argparse +import json +from pathlib import Path +from typing import final +import abc + +MIN_NUM_QUBITS = 8 +MAX_NUM_QUBITS = 24 +STEP_NUM_QUBITS = 2 +NUM_GATE_REPS = 1000 + + +class BenchmarkRunner: + def __init__(self, kernel, operation): + self.kernel = kernel + self.operation = operation + + @final + def benchmark(self, res_path): + result = [] + ext_info = self.external_info() + if ext_info: + result.append(ext_info) + try: + for num_qubit in range(MIN_NUM_QUBITS, MAX_NUM_QUBITS + 1, STEP_NUM_QUBITS): + cmd = self.command(num_qubit) + print(f"Run N={num_qubit}, {self.kernel}, {self.operation}") + output = subprocess.run([str(c) for c in cmd], capture_output=True, check=True) + time = output.stdout.decode("utf-8").strip().split(",")[1] + result.append({"N": num_qubit, "time": time}) + except subprocess.CalledProcessError as err: + print("Error from subprocess call. Message:") + print(err.stderr.decode("utf-8")) + except KeyboardInterrupt: + pass + + res_path = Path(res_path) + if not res_path.exists(): + res_path.mkdir(parents=True) + + with res_path.joinpath(self.filename()).open("w") as f: + json.dump(result, f, indent=4) + + @abc.abstractmethod + def command(self, num_qubits): + pass + + @abc.abstractmethod + def external_info(self): + pass + + @abc.abstractmethod + def filename(self): + pass + + +class MatrixBenchmarkRunner(BenchmarkRunner): + def __init__(self, kernel, operation, num_wires): + super().__init__(kernel, operation) + self.num_wires = num_wires + + def command(self, num_qubits): + return ["./benchmark_matrix", NUM_GATE_REPS, num_qubits, self.kernel, self.num_wires] + + def external_info(self): + return {"num_wires": self.num_wires} + + def filename(self): + return f"Matrix_{self.kernel}_{self.num_wires}.json" + + +class GateBenchmarkRunner(BenchmarkRunner): + def __init__(self, kernel, operation, num_wires=None): + super().__init__(kernel, operation) + self.num_wires = num_wires + + def command(self, num_qubits): + cmd = ["./benchmark_gate", NUM_GATE_REPS, num_qubits, self.kernel, self.operation] + if self.num_wires: + cmd.append(self.num_wires) + return cmd + + def external_info(self): + if self.num_wires: + return {"num_wires": self.num_wires} + else: + return None + + def filename(self): + if self.num_wires: + return f"{self.operation}_{self.kernel}_{self.num_wires}.json" + else: + return f"{self.operation}_{self.kernel}.json" + + +class GeneratorBenchmarkRunner(BenchmarkRunner): + def __init__(self, kernel, operation, num_wires=None): + super().__init__(kernel, operation) + self.num_wires = num_wires + + def command(self, num_qubits): + cmd = ["./benchmark_generator", NUM_GATE_REPS, num_qubits, self.kernel, self.operation[9:]] + if self.num_wires is not None: + cmd.append(self.num_wires) + return cmd + + def external_info(self): + if self.num_wires: + return {"num_wires": self.num_wires} + else: + return None + + def filename(self): + if self.num_wires: + return f"{self.operation}_{self.kernel}_{self.num_wires}.json" + else: + return f"{self.operation}_{self.kernel}.json" + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run C++ benchmarks") + parser.add_argument("kernel", help="Kernel to benchmark") + parser.add_argument("operation", help="Operation to benchmark.") + + parser.add_argument( + "num_wires", + help="Number of wires (optional for multi-qubit operations).", + nargs="?", + default=None, + type=int, + ) + + args = parser.parse_args() + + compiler_info_file = "compiler_info.txt" + + try: + with open(compiler_info_file, "r") as f: + res_path = "res_" + f.readline().strip() + except OSError: + print("Encountered an error while opening '{}'".format(compiler_info_file)) + sys.exit(1) + + if args.operation == "Matrix": + if args.num_wires == 0: + raise ValueError( + "Parameter num_wires must be provided and larger than 0 for matrix benchmark." + ) + runner = MatrixBenchmarkRunner(args.kernel, args.operation, args.num_wires) + elif args.operation.startswith("Generator"): + runner = GeneratorBenchmarkRunner(args.kernel, args.operation, args.num_wires) + else: + runner = GateBenchmarkRunner(args.kernel, args.operation, args.num_wires) + + runner.benchmark(res_path) diff --git a/pennylane_lightning/src/examples/run_benchmark.sh b/pennylane_lightning/src/examples/run_benchmark.sh index e8f7daf657..1818100452 100755 --- a/pennylane_lightning/src/examples/run_benchmark.sh +++ b/pennylane_lightning/src/examples/run_benchmark.sh @@ -13,24 +13,29 @@ max_num_qubits=24 num_qubits_increment=2 num_gate_reps=1000 kernel="$1" -gate="$2" -path_to_binary="./benchmark_gate" + +if [[ "$2" == "Matrix" ]]; then + path_to_binary="./benchmark_matrix" + command_format="$path_to_binary ${num_gate_reps} $kernel ${@:3}" +elif [[ "$2" =~ "Generator.*" ]]; then + path_to_binary="./benchmark_generator" + operation=$(echo "$2" | cut -c10-) + command_format="$path_to_binary ${num_gate_reps} %d $kernel $operation ${@:3}" +else + path_to_binary="./benchmark_gate" + operation="$2" + command_format="$path_to_binary ${num_gate_reps} %d $kernel $operation ${@:3}" +fi + compiler_info=$( $path_to_csv -if [[ "$#" -eq 3 ]]; then - command_format="$path_to_binary ${num_gate_reps} %d ${kernel} ${gate} $3" -else - command_format="$path_to_binary ${num_gate_reps} %d ${kernel} ${gate}" -fi - # Generate data for ((num_qubits=$min_num_qubits; num_qubits<$max_num_qubits+1; num_qubits+=$num_qubits_increment)); do echo "Gate repetition=$num_gate_reps, num_qubits=$num_qubits, kernel=$kernel, gate=$gate" diff --git a/pennylane_lightning/src/examples/test.sh b/pennylane_lightning/src/examples/test.sh index bb03f7a040..fec430ac67 100644 --- a/pennylane_lightning/src/examples/test.sh +++ b/pennylane_lightning/src/examples/test.sh @@ -1 +1 @@ -echo $# +echo ${[]} diff --git a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp index 5cb25cbd93..f3a33b91bf 100644 --- a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp +++ b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp @@ -312,7 +312,7 @@ class DefaultKernelsForStateVector { all_qubit_numbers, KernelType::LM); instance.assignKernelForMatrix(MatrixOperation::MultiQubitOp, all_threading, all_memory_model, - all_qubit_numbers, KernelType::LM); + all_qubit_numbers, KernelType::PI); } DefaultKernelsForStateVector() { From d0265b3f2a8241fdf93fff5083c29461ed16d67c Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 1 Mar 2022 23:25:19 -0500 Subject: [PATCH 06/94] Update CMake --- pennylane_lightning/src/examples/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pennylane_lightning/src/examples/CMakeLists.txt b/pennylane_lightning/src/examples/CMakeLists.txt index 8c7d0a900b..6754f60fee 100644 --- a/pennylane_lightning/src/examples/CMakeLists.txt +++ b/pennylane_lightning/src/examples/CMakeLists.txt @@ -42,8 +42,8 @@ configure_file("compiler_info.in" "compiler_info.txt") add_custom_command(TARGET benchmark_gate POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy - ${PROJECT_SOURCE_DIR}/run_benchmark.sh - ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/run_benchmark.sh + ${PROJECT_SOURCE_DIR}/run_benchmark.py + ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/run_benchmark.py COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/plot_benchmark.py ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/plot_benchmark.py From d4b92d13da5db5e893a790dae1f7d8ce34a017cd Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 1 Mar 2022 23:39:08 -0500 Subject: [PATCH 07/94] Fix for tidy --- pennylane_lightning/src/examples/benchmark_gate.cpp | 3 ++- pennylane_lightning/src/examples/benchmark_generator.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pennylane_lightning/src/examples/benchmark_gate.cpp b/pennylane_lightning/src/examples/benchmark_gate.cpp index 00545b1988..7dff1f507c 100644 --- a/pennylane_lightning/src/examples/benchmark_gate.cpp +++ b/pennylane_lightning/src/examples/benchmark_gate.cpp @@ -175,7 +175,7 @@ int main(int argc, char *argv[]) { size_t num_wires_for_multi_qubit = 0; if (Util::array_has_elt(Constant::multi_qubit_gates, gate_op)) { // User provided a multi-qubit gates - if (argc != 6) { + if (argc != 6) { // NOLINT(readability-magic-numbers) std::cerr << "One should provide the number of wires when using " "multi qubit gates." << std::endl; @@ -183,6 +183,7 @@ int main(int argc, char *argv[]) { } try { + // NOLINTNEXTLINE(readability-magic-numbers) num_wires_for_multi_qubit = std::stoi(argv[5]); } catch (std::exception &e) { std::cerr << "Number of wires must be an integer" << std::endl; diff --git a/pennylane_lightning/src/examples/benchmark_generator.cpp b/pennylane_lightning/src/examples/benchmark_generator.cpp index 0753b57e6a..5e132d7e25 100644 --- a/pennylane_lightning/src/examples/benchmark_generator.cpp +++ b/pennylane_lightning/src/examples/benchmark_generator.cpp @@ -174,7 +174,7 @@ int main(int argc, char *argv[]) { size_t num_wires_for_multi_qubit = 0; if (Util::array_has_elt(Constant::multi_qubit_generators, gntr_op)) { // User provided a multi-qubit gates - if (argc != 6) { + if (argc != 6) { // NOLINT(readability-magic-numbers) std::cerr << "One should provide the number of wires when using " "multi qubit generators." << std::endl; @@ -182,6 +182,7 @@ int main(int argc, char *argv[]) { } try { + // NOLINTNEXTLINE(readability-magic-numbers) num_wires_for_multi_qubit = std::stoi(argv[5]); } catch (std::exception &e) { std::cerr << "Number of wires must be an integer" << std::endl; From aa8f3122249a3914d3fe5793da6e650be964ca63 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 1 Mar 2022 23:40:55 -0500 Subject: [PATCH 08/94] Fix for codefac --- .github/workflows/dev_version_script.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/dev_version_script.py b/.github/workflows/dev_version_script.py index 2b3f526f8b..f3a2a7300b 100644 --- a/.github/workflows/dev_version_script.py +++ b/.github/workflows/dev_version_script.py @@ -34,10 +34,7 @@ def extract_version(package_path): def is_dev(version_str): m = rgx_dev_ver.fullmatch(version_str) - if m: - return True - else: - return False + return m is not None: def update_dev_version(package_path, version_str): m = rgx_dev_ver.fullmatch(version_str) From 717c0683c92208fc1a760a3d320e898998c5e407 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 1 Mar 2022 23:53:20 -0500 Subject: [PATCH 09/94] Update for Codefactor --- pennylane_lightning/src/bindings/Bindings.hpp | 1 - .../src/examples/benchmark_generator.cpp | 1 - .../src/examples/run_benchmark.py | 12 ++--- .../src/examples/run_benchmark.sh | 44 ------------------- pennylane_lightning/src/examples/test.sh | 1 - .../cpu_kernels/GateImplementationsLM.hpp | 2 - .../src/simulator/StateVectorManagedCPU.hpp | 2 - .../src/tests/CreateAllWires.cpp | 1 - .../src/tests/Test_Internal.cpp | 1 - 9 files changed, 4 insertions(+), 61 deletions(-) delete mode 100755 pennylane_lightning/src/examples/run_benchmark.sh delete mode 100644 pennylane_lightning/src/examples/test.sh diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index 84ef5f806c..089aea48d7 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -107,7 +107,6 @@ void deallocateArray(void *ptr) { std::free(ptr); } * for capsule usage. */ auto allocateAlignedArray(size_t size, pybind11::dtype dt) -> pybind11::array { - auto memory_model = bestCPUMemoryModel(); if (dt.is(pybind11::dtype::of())) { diff --git a/pennylane_lightning/src/examples/benchmark_generator.cpp b/pennylane_lightning/src/examples/benchmark_generator.cpp index 5e132d7e25..c1ea726ec3 100644 --- a/pennylane_lightning/src/examples/benchmark_generator.cpp +++ b/pennylane_lightning/src/examples/benchmark_generator.cpp @@ -68,7 +68,6 @@ auto generateGeneratorSequence(RandomEngine &re, std::uniform_int_distribution inverse_dist(0, 1); for (uint32_t k = 0; k < num_reps; k++) { - bool inverse = static_cast(inverse_dist(re)); auto wires = generateNeighboringWires(re, num_qubits, num_wires); diff --git a/pennylane_lightning/src/examples/run_benchmark.py b/pennylane_lightning/src/examples/run_benchmark.py index 1a7febfa27..f2a770d2c8 100644 --- a/pennylane_lightning/src/examples/run_benchmark.py +++ b/pennylane_lightning/src/examples/run_benchmark.py @@ -84,14 +84,12 @@ def command(self, num_qubits): def external_info(self): if self.num_wires: return {"num_wires": self.num_wires} - else: - return None + return None def filename(self): if self.num_wires: return f"{self.operation}_{self.kernel}_{self.num_wires}.json" - else: - return f"{self.operation}_{self.kernel}.json" + return f"{self.operation}_{self.kernel}.json" class GeneratorBenchmarkRunner(BenchmarkRunner): @@ -108,14 +106,12 @@ def command(self, num_qubits): def external_info(self): if self.num_wires: return {"num_wires": self.num_wires} - else: - return None + return None def filename(self): if self.num_wires: return f"{self.operation}_{self.kernel}_{self.num_wires}.json" - else: - return f"{self.operation}_{self.kernel}.json" + return f"{self.operation}_{self.kernel}.json" if __name__ == "__main__": diff --git a/pennylane_lightning/src/examples/run_benchmark.sh b/pennylane_lightning/src/examples/run_benchmark.sh deleted file mode 100755 index 1818100452..0000000000 --- a/pennylane_lightning/src/examples/run_benchmark.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -currdir=$(pwd) - -if [ "$#" -lt 2 ]; then - echo "Usage: $0 Kernel Gate [Number of wires (for MultiRZ)]" - exit 1 -fi - -# Parameter initialization -min_num_qubits=8 -max_num_qubits=24 -num_qubits_increment=2 -num_gate_reps=1000 -kernel="$1" - -if [[ "$2" == "Matrix" ]]; then - path_to_binary="./benchmark_matrix" - command_format="$path_to_binary ${num_gate_reps} $kernel ${@:3}" -elif [[ "$2" =~ "Generator.*" ]]; then - path_to_binary="./benchmark_generator" - operation=$(echo "$2" | cut -c10-) - command_format="$path_to_binary ${num_gate_reps} %d $kernel $operation ${@:3}" -else - path_to_binary="./benchmark_gate" - operation="$2" - command_format="$path_to_binary ${num_gate_reps} %d $kernel $operation ${@:3}" -fi - - -compiler_info=$( $path_to_csv - -# Generate data -for ((num_qubits=$min_num_qubits; num_qubits<$max_num_qubits+1; num_qubits+=$num_qubits_increment)); do - echo "Gate repetition=$num_gate_reps, num_qubits=$num_qubits, kernel=$kernel, gate=$gate" - command=$(printf "$command_format" "$num_qubits") - $command >> $path_to_csv -done diff --git a/pennylane_lightning/src/examples/test.sh b/pennylane_lightning/src/examples/test.sh deleted file mode 100644 index fec430ac67..0000000000 --- a/pennylane_lightning/src/examples/test.sh +++ /dev/null @@ -1 +0,0 @@ -echo ${[]} diff --git a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp index 2a0e75c529..651731f8d2 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp @@ -260,7 +260,6 @@ class GateImplementationsLM : public PauliGenerator { if (inverse) { for (size_t k = 0; k < Util::exp2(num_qubits); k += dim) { - for (size_t inner_idx = 0; inner_idx < dim; inner_idx++) { size_t idx = k | inner_idx; size_t n_wires = wires.size(); @@ -285,7 +284,6 @@ class GateImplementationsLM : public PauliGenerator { } } else { for (size_t k = 0; k < Util::exp2(num_qubits); k += dim) { - for (size_t inner_idx = 0; inner_idx < dim; inner_idx++) { size_t idx = k | inner_idx; size_t n_wires = wires.size(); diff --git a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp index 599cb9b91b..dd5d817db7 100644 --- a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp @@ -48,7 +48,6 @@ class StateVectorManagedCPU size_t num_qubits, Threading threading = bestThreading(), CPUMemoryModel memory_model = bestCPUMemoryModel()) : BaseType{num_qubits, threading, memory_model} { - size_t length = BaseType::getLength(); data_ = allocateMemory(memory_model, length); std::fill(data_.get(), data_.get() + length, @@ -61,7 +60,6 @@ class StateVectorManagedCPU const StateVectorCPU &other) : BaseType(other.getNumQubits(), other.threading(), other.memoryModel()) { - size_t length = BaseType::getLength(); data_ = allocateMemory(other.memoryModel(), length); diff --git a/pennylane_lightning/src/tests/CreateAllWires.cpp b/pennylane_lightning/src/tests/CreateAllWires.cpp index 4738554b54..ecea28089c 100644 --- a/pennylane_lightning/src/tests/CreateAllWires.cpp +++ b/pennylane_lightning/src/tests/CreateAllWires.cpp @@ -2,7 +2,6 @@ namespace Pennylane { auto crateAllWires(size_t n_qubits, Gates::GateOperation gate_op, bool order) -> std::vector> { - if (Util::array_has_elt(Gates::Constant::multi_qubit_gates, gate_op)) { // make all possible 2^N permutations std::vector> res; diff --git a/pennylane_lightning/src/tests/Test_Internal.cpp b/pennylane_lightning/src/tests/Test_Internal.cpp index b595d5daeb..9ef68bdc13 100644 --- a/pennylane_lightning/src/tests/Test_Internal.cpp +++ b/pennylane_lightning/src/tests/Test_Internal.cpp @@ -109,7 +109,6 @@ size_t permSize(size_t n, size_t r) { * @brief Test create all wires */ TEST_CASE("createAllWires", "[Test_Internal]") { - SECTION("order = false") { const std::vector> test_pairs{ {4, 2}, {8, 3}, {12, 1}, {12, 2}, {12, 3}, {12, 4}, {12, 5}, From fc5dce9440f4b34f55ab76a988967a8bd17a3eec Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 1 Mar 2022 23:54:06 -0500 Subject: [PATCH 10/94] Fix version script --- .github/workflows/dev_version_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dev_version_script.py b/.github/workflows/dev_version_script.py index f3a2a7300b..82d47e6656 100644 --- a/.github/workflows/dev_version_script.py +++ b/.github/workflows/dev_version_script.py @@ -34,7 +34,7 @@ def extract_version(package_path): def is_dev(version_str): m = rgx_dev_ver.fullmatch(version_str) - return m is not None: + return m is not None def update_dev_version(package_path, version_str): m = rgx_dev_ver.fullmatch(version_str) From c219a5c076da556d6769ea6af315063fc2970c4b Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 1 Mar 2022 23:55:21 -0500 Subject: [PATCH 11/94] More fix for codecov --- pennylane_lightning/src/examples/benchmark_matrix.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/pennylane_lightning/src/examples/benchmark_matrix.cpp b/pennylane_lightning/src/examples/benchmark_matrix.cpp index 9d297db91c..26d6ec45df 100644 --- a/pennylane_lightning/src/examples/benchmark_matrix.cpp +++ b/pennylane_lightning/src/examples/benchmark_matrix.cpp @@ -38,7 +38,6 @@ template auto generateMatrixSequence(RandomEngine &re, const size_t num_reps, const size_t num_qubits, const size_t num_wires) -> std::vector { - std::vector matrix_seq; matrix_seq.reserve(num_reps); std::uniform_int_distribution inverse_dist(0, 1); From c882648dcd10ed27ec05e635ffcea4f5ce2966a1 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 2 Mar 2022 00:41:09 -0500 Subject: [PATCH 12/94] tidy test --- .github/workflows/format.yml | 2 +- pennylane_lightning/src/CMakeLists.txt | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 347f111cae..319471b3ca 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -42,7 +42,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: 3.7 + python-version: 3.8 - name: Install dependencies run: sudo apt update && sudo apt -y install clang-tidy-12 cmake g++ diff --git a/pennylane_lightning/src/CMakeLists.txt b/pennylane_lightning/src/CMakeLists.txt index b6776ac992..0385f947d6 100644 --- a/pennylane_lightning/src/CMakeLists.txt +++ b/pennylane_lightning/src/CMakeLists.txt @@ -11,8 +11,9 @@ if(ENABLE_CLANG_TIDY) if(NOT DEFINED CLANG_TIDY_BINARY) set(CLANG_TIDY_BINARY clang-tidy) endif() + message(STATUS "Using CLANG_TIDY_BINARY=${CLANG_TIDY_BINARY}") set(CMAKE_CXX_CLANG_TIDY ${CLANG_TIDY_BINARY}; - -extra-arg=-std=c++17; + -extra-arg=-std=c++17; ) endif() From 6a8c17953caf648f25e72741307ad546be14fba9 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 2 Mar 2022 10:01:53 -0500 Subject: [PATCH 13/94] Fix for clang --- .../src/simulator/DynamicDispatcher.hpp | 2 +- pennylane_lightning/src/tests/TestHelpers.hpp | 2 +- .../Test_GateImplementations_Generator.cpp | 2 -- .../src/tests/Test_Internal.cpp | 4 +-- .../src/tests/Test_OpToMemberFuncPtr.cpp | 10 ------ pennylane_lightning/src/util/Memory.hpp | 33 +++++++++++++++++-- 6 files changed, 34 insertions(+), 19 deletions(-) diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp index e761cdeca4..f25bddf95a 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp @@ -323,7 +323,7 @@ template class DynamicDispatcher { "The size of matrix does not match with the given " "number of wires"); } - applyMatrix(kernel, data, num_qubits, matrix.data(), wires, inverse); + applyMatrix(kernel, data, mat_op, num_qubits, matrix.data(), wires, inverse); } /** diff --git a/pennylane_lightning/src/tests/TestHelpers.hpp b/pennylane_lightning/src/tests/TestHelpers.hpp index dc6056016b..457d1eebd2 100644 --- a/pennylane_lightning/src/tests/TestHelpers.hpp +++ b/pennylane_lightning/src/tests/TestHelpers.hpp @@ -127,7 +127,7 @@ isApproxEqual(const std::vector &data1, const typename Data_t::value_type eps = std::numeric_limits::epsilon() * 100) { - return data1 == PLApprox(data2); + return data1 == PLApprox(data2).epsilon(eps); } /** diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp index 2e9cd9cdcb..4b80e5235b 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp @@ -35,8 +35,6 @@ constexpr std::string_view remove_prefix(const std::string_view &str, return {str.data() + len, str.length() - len}; } -constexpr auto gate_name_to_ops = Util::reverse_pairs(Constant::gate_names); - template constexpr auto findGateOpForGenerator() -> GateOperation { constexpr auto gntr_name = diff --git a/pennylane_lightning/src/tests/Test_Internal.cpp b/pennylane_lightning/src/tests/Test_Internal.cpp index 9ef68bdc13..99233b89d6 100644 --- a/pennylane_lightning/src/tests/Test_Internal.cpp +++ b/pennylane_lightning/src/tests/Test_Internal.cpp @@ -114,7 +114,7 @@ TEST_CASE("createAllWires", "[Test_Internal]") { {4, 2}, {8, 3}, {12, 1}, {12, 2}, {12, 3}, {12, 4}, {12, 5}, {12, 6}, {12, 7}, {12, 8}, {12, 9}, {12, 10}, {12, 11}, {12, 12}}; - for (const auto [n, r] : test_pairs) { + for (const auto& [n, r] : test_pairs) { std::vector> vec; auto v = CombinationGenerator(n, r).all_perms(); @@ -139,7 +139,7 @@ TEST_CASE("createAllWires", "[Test_Internal]") { const std::vector> test_pairs{ {4, 2}, {8, 3}, {12, 1}, {12, 2}, {12, 3}, {12, 4}, {12, 5}}; - for (const auto [n, r] : test_pairs) { + for (const auto& [n, r] : test_pairs) { auto v = PermutationGenerator(n, r).all_perms(); REQUIRE(v.size() == permSize(n, r)); diff --git a/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp b/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp index 6f3f5bdd4f..99f5494128 100644 --- a/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp +++ b/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp @@ -95,16 +95,6 @@ class DummyImplementation { allGateOps(); constexpr static std::string_view name = "Dummy"; - template - static void applyMatrix(std::complex *arr, size_t num_qubits, - const std::complex *matrix, - const std::vector &wires, bool inverse) { - static_cast(arr); - static_cast(num_qubits); - static_cast(matrix); - static_cast(inverse); - } - PENNYLANE_TESTS_DEFINE_GATE_OP(PauliX, 0) PENNYLANE_TESTS_DEFINE_GATE_OP(PauliY, 0) PENNYLANE_TESTS_DEFINE_GATE_OP(PauliZ, 0) diff --git a/pennylane_lightning/src/util/Memory.hpp b/pennylane_lightning/src/util/Memory.hpp index 223d977c0a..0f5e3aa9f7 100644 --- a/pennylane_lightning/src/util/Memory.hpp +++ b/pennylane_lightning/src/util/Memory.hpp @@ -19,7 +19,35 @@ #include "ConstantUtil.hpp" #include "TypeList.hpp" +/* Apple clang does not support std::aligned_alloc in Mac 10.14 */ + namespace Pennylane { +/** + * @brief Custom aligned allocate function. As appleclang does not support + * std::aligned_alloc in Mac OS 10.14, we use posix memalign + */ +inline auto alignedAlloc(uint32_t alignment, size_t bytes) -> void* { +#if defined(__clang__) // probably AppleClang + void* p; + posix_memalign(&p, alignment, bytes); + return p; +#elif defined(_MSC_VER) + return _aligned_malloc(bytes, alignment); +#else + return std::aligned_alloc(alignment, bytes); +#endif +} + +inline void alignedFree(void* p) { +#if defined(__clang__) + return free(p); +#elif defined(_MSC_VER) + return _aligned_free(p); +#else + return std::free(p); +#endif +} + template struct AlignedAllocator { static_assert(Util::constIsPerfectPowerOf2(alignment), "Template parameter alignment must be power of 2."); @@ -39,7 +67,7 @@ template struct AlignedAllocator { if (size == 0) { return nullptr; } - void *p = std::aligned_alloc(alignment, sizeof(T) * size); + void *p = alignedAlloc(alignment, sizeof(T) * size); if (p == nullptr) { throw std::bad_alloc(); } @@ -47,8 +75,7 @@ template struct AlignedAllocator { } void deallocate(T *p, [[maybe_unused]] std::size_t size) noexcept { - // NOLINTNEXTLINE(hicpp-no-malloc) - std::free(p); + alignedFree(p); } template void construct(U *ptr) { ::new ((void *)ptr) U(); } From 338cf534711b863e4b4bbaf9c727ab543c5b138b Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Wed, 2 Mar 2022 15:03:34 +0000 Subject: [PATCH 14/94] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index 50254fda3b..b362bf426f 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.22.0-dev12" +__version__ = "0.22.0-dev13" From 13ed29ebf8e16911effe936dc7dff52e3f382232 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 2 Mar 2022 10:12:53 -0500 Subject: [PATCH 15/94] More fix for appleclang --- pennylane_lightning/src/bindings/Bindings.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index 089aea48d7..0c99bd723d 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -97,7 +97,7 @@ auto getNumpyArrayAlignment(const pybind11::array &numpyArray) return getMemoryModel(numpyArray.request().ptr); } -void deallocateArray(void *ptr) { std::free(ptr); } +void deallocateArray(void *ptr) { alignedFree(ptr); } /** * @brief We return an numpy array whose underlying data is allocated by @@ -110,20 +110,20 @@ auto allocateAlignedArray(size_t size, pybind11::dtype dt) -> pybind11::array { auto memory_model = bestCPUMemoryModel(); if (dt.is(pybind11::dtype::of())) { - void *ptr = std::aligned_alloc(getAlignment(memory_model), + void *ptr = alignedAlloc(getAlignment(memory_model), sizeof(float) * size); auto capsule = pybind11::capsule(ptr, &deallocateArray); return pybind11::array{dt, {size}, {sizeof(float)}, ptr, capsule}; } else if (dt.is(pybind11::dtype::of())) { - void *ptr = std::aligned_alloc(getAlignment(memory_model), + void *ptr = alignedAlloc(getAlignment(memory_model), sizeof(double) * size); auto capsule = pybind11::capsule(ptr, &deallocateArray); return pybind11::array{dt, {size}, {sizeof(double)}, ptr, capsule}; } else if (dt.is(pybind11::dtype::of>())) { void *ptr = - std::aligned_alloc(getAlignment>(memory_model), + alignedAlloc(getAlignment>(memory_model), sizeof(std::complex) * size); auto capsule = pybind11::capsule(ptr, &deallocateArray); @@ -131,7 +131,7 @@ auto allocateAlignedArray(size_t size, pybind11::dtype dt) -> pybind11::array { dt, {size}, {sizeof(std::complex)}, ptr, capsule}; } else if (dt.is(pybind11::dtype::of>())) { void *ptr = - std::aligned_alloc(getAlignment>(memory_model), + alignedAlloc(getAlignment>(memory_model), sizeof(std::complex) * size); auto capsule = pybind11::capsule(ptr, &deallocateArray); From 09b471e760f21feb75a7017087d5932e0b85322a Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 2 Mar 2022 13:32:11 -0500 Subject: [PATCH 16/94] Fix aligned memory --- .../src/algorithms/JacobianTape.hpp | 6 +- pennylane_lightning/src/bindings/Bindings.hpp | 8 +- .../src/simulator/CPUMemoryModel.hpp | 29 ++---- .../src/simulator/DynamicDispatcher.hpp | 3 +- .../src/simulator/StateVectorManagedCPU.hpp | 45 +++------ pennylane_lightning/src/tests/TestHelpers.hpp | 19 ++-- .../tests/Test_GateImplementations_Param.cpp | 93 ++++++++++--------- .../src/tests/Test_Internal.cpp | 4 +- pennylane_lightning/src/util/Memory.hpp | 59 ++++++------ 9 files changed, 126 insertions(+), 140 deletions(-) diff --git a/pennylane_lightning/src/algorithms/JacobianTape.hpp b/pennylane_lightning/src/algorithms/JacobianTape.hpp index ca7d0ac6f7..8a33e89f02 100644 --- a/pennylane_lightning/src/algorithms/JacobianTape.hpp +++ b/pennylane_lightning/src/algorithms/JacobianTape.hpp @@ -86,9 +86,9 @@ template class ObsDatum { } private: - const std::vector obs_name_; - const std::vector obs_params_; - const std::vector> obs_wires_; + const std::vector obs_name_{}; + const std::vector obs_params_{}; + const std::vector> obs_wires_{}; }; /** diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index 0c99bd723d..9e2fe54114 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -111,20 +111,20 @@ auto allocateAlignedArray(size_t size, pybind11::dtype dt) -> pybind11::array { if (dt.is(pybind11::dtype::of())) { void *ptr = alignedAlloc(getAlignment(memory_model), - sizeof(float) * size); + sizeof(float) * size); auto capsule = pybind11::capsule(ptr, &deallocateArray); return pybind11::array{dt, {size}, {sizeof(float)}, ptr, capsule}; } else if (dt.is(pybind11::dtype::of())) { void *ptr = alignedAlloc(getAlignment(memory_model), - sizeof(double) * size); + sizeof(double) * size); auto capsule = pybind11::capsule(ptr, &deallocateArray); return pybind11::array{dt, {size}, {sizeof(double)}, ptr, capsule}; } else if (dt.is(pybind11::dtype::of>())) { void *ptr = alignedAlloc(getAlignment>(memory_model), - sizeof(std::complex) * size); + sizeof(std::complex) * size); auto capsule = pybind11::capsule(ptr, &deallocateArray); return pybind11::array{ @@ -132,7 +132,7 @@ auto allocateAlignedArray(size_t size, pybind11::dtype dt) -> pybind11::array { } else if (dt.is(pybind11::dtype::of>())) { void *ptr = alignedAlloc(getAlignment>(memory_model), - sizeof(std::complex) * size); + sizeof(std::complex) * size); auto capsule = pybind11::capsule(ptr, &deallocateArray); return pybind11::array{ diff --git a/pennylane_lightning/src/simulator/CPUMemoryModel.hpp b/pennylane_lightning/src/simulator/CPUMemoryModel.hpp index 97b60cf7f4..b6228401a0 100644 --- a/pennylane_lightning/src/simulator/CPUMemoryModel.hpp +++ b/pennylane_lightning/src/simulator/CPUMemoryModel.hpp @@ -18,6 +18,7 @@ */ #pragma once #include "Macros.hpp" +#include "Memory.hpp" #include #include @@ -52,11 +53,11 @@ constexpr inline auto bestCPUMemoryModel() -> CPUMemoryModel { return CPUMemoryModel::Unaligned; } -template -constexpr inline auto getAlignment(CPUMemoryModel memory_model) -> size_t { +template +constexpr inline auto getAlignment(CPUMemoryModel memory_model) -> uint32_t { switch (memory_model) { case CPUMemoryModel::Unaligned: - return alignof(PrecisionT); + return alignof(T); case CPUMemoryModel::Aligned256: return 32U; case CPUMemoryModel::Aligned512: @@ -67,23 +68,9 @@ constexpr inline auto getAlignment(CPUMemoryModel memory_model) -> size_t { PL_UNREACHABLE; } -template -auto allocateMemory(CPUMemoryModel memory_model, size_t size) - // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) - -> std::unique_ptr { - switch (memory_model) { - case CPUMemoryModel::Unaligned: - // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) - return std::unique_ptr{new T[size]}; - case CPUMemoryModel::Aligned256: - // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) - return std::unique_ptr{new (std::align_val_t(32)) T[size]}; - case CPUMemoryModel::Aligned512: - // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) - return std::unique_ptr{new (std::align_val_t(64)) T[size]}; - default: - break; - } - PL_UNREACHABLE; +template +constexpr auto getAllocator(CPUMemoryModel memory_model) + -> AlignedAllocator { + return AlignedAllocator{getAlignment(memory_model)}; } } // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp index f25bddf95a..71c92bf772 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp @@ -323,7 +323,8 @@ template class DynamicDispatcher { "The size of matrix does not match with the given " "number of wires"); } - applyMatrix(kernel, data, mat_op, num_qubits, matrix.data(), wires, inverse); + applyMatrix(kernel, data, mat_op, num_qubits, matrix.data(), wires, + inverse); } /** diff --git a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp index dd5d817db7..ad21a48134 100644 --- a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp @@ -26,9 +26,6 @@ namespace Pennylane { * @brief StateVector class where data resides in CPU memory. Memory ownership * resides within class. * - * We currently use std::unique_ptr to C-style array as we want to choose - * allocator in runtime. This is impossible with std::vector. - * * @tparam PrecisionT */ template @@ -41,17 +38,15 @@ class StateVectorManagedCPU using BaseType = StateVectorCPU; // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) - std::unique_ptr data_; + std::vector> data_; public: explicit StateVectorManagedCPU( size_t num_qubits, Threading threading = bestThreading(), CPUMemoryModel memory_model = bestCPUMemoryModel()) - : BaseType{num_qubits, threading, memory_model} { - size_t length = BaseType::getLength(); - data_ = allocateMemory(memory_model, length); - std::fill(data_.get(), data_.get() + length, - ComplexPrecisionT{0.0, 0.0}); + : BaseType{num_qubits, threading, memory_model}, + data_{Util::exp2(num_qubits), ComplexPrecisionT{0.0, 0.0}, + getAllocator(this->memory_model_)} { data_[0] = {1, 0}; } @@ -59,24 +54,19 @@ class StateVectorManagedCPU explicit StateVectorManagedCPU( const StateVectorCPU &other) : BaseType(other.getNumQubits(), other.threading(), - other.memoryModel()) { - size_t length = BaseType::getLength(); - data_ = allocateMemory(other.memoryModel(), length); - - std::copy(other.getData(), other.getData() + length, data_.get()); - } + other.memoryModel()), + data_{other.getData(), other.getData() + other.getLength(), + getAllocator(this->memory_model_)} {} StateVectorManagedCPU(const ComplexPrecisionT *other_data, size_t other_size, Threading threading = bestThreading(), CPUMemoryModel memory_model = bestCPUMemoryModel()) - : BaseType(Util::log2PerfectPower(other_size), threading, - memory_model) { + : BaseType(Util::log2PerfectPower(other_size), threading, memory_model), + data_{other_data, other_data + other_size, + getAllocator(this->memory_model_)} { PL_ABORT_IF_NOT(Util::isPerfectPowerOf2(other_size), "The size of provided data must be a power of 2."); - - data_ = allocateMemory(memory_model, other_size); - updateData(other_data); } // Clang-tidy gives false positive for delegating constructor @@ -89,24 +79,19 @@ class StateVectorManagedCPU : StateVectorManagedCPU(rhs.data(), rhs.size(), threading, memory_model) {} - StateVectorManagedCPU(const StateVectorManagedCPU &rhs) : BaseType(rhs) { - size_t length = BaseType::getLength(); - data_ = allocateMemory(rhs.memory_model_, length); - std::copy(rhs.getData(), rhs.getData() + length, data_.get()); - } - + StateVectorManagedCPU(const StateVectorManagedCPU &rhs) = default; StateVectorManagedCPU(StateVectorManagedCPU &&) noexcept = default; - StateVectorManagedCPU &operator=(const StateVectorManagedCPU &) = delete; + StateVectorManagedCPU &operator=(const StateVectorManagedCPU &) = default; StateVectorManagedCPU & operator=(StateVectorManagedCPU &&) noexcept = default; ~StateVectorManagedCPU() = default; - [[nodiscard]] auto getData() -> ComplexPrecisionT * { return data_.get(); } + [[nodiscard]] auto getData() -> ComplexPrecisionT * { return data_.data(); } [[nodiscard]] auto getData() const -> const ComplexPrecisionT * { - return data_.get(); + return data_.data(); } /** @@ -115,7 +100,7 @@ class StateVectorManagedCPU * @param new_data std::vector contains data. */ void updateData(const ComplexPrecisionT *data) { - std::copy(data, data + BaseType::getLength(), data_.get()); + std::copy(data, data + BaseType::getLength(), data_.data()); } }; } // namespace Pennylane diff --git a/pennylane_lightning/src/tests/TestHelpers.hpp b/pennylane_lightning/src/tests/TestHelpers.hpp index 457d1eebd2..facb6372c7 100644 --- a/pennylane_lightning/src/tests/TestHelpers.hpp +++ b/pennylane_lightning/src/tests/TestHelpers.hpp @@ -150,9 +150,10 @@ isApproxEqual(const Data_t &data1, const Data_t &data2, } template -using TestVector = std::vector< - T, - PLAllocator, TestKernels>>>; +constexpr static auto test_allocator = + AlignedAllocator{Util::common_alignment_v}; + +template using TestVector = std::vector>; /** * @brief Multiplies every value in a dataset by a given complex scalar value. @@ -192,7 +193,8 @@ void scaleVector(std::vector, Alloc> &data, template auto createZeroState(size_t num_qubits) -> TestVector> { - TestVector> res(1U << num_qubits, {0.0, 0.0}); + TestVector> res( + 1U << num_qubits, {0.0, 0.0}, test_allocator>); res[0] = std::complex{1.0, 0.0}; return res; } @@ -203,7 +205,8 @@ auto createZeroState(size_t num_qubits) template auto createPlusState(size_t num_qubits) -> TestVector> { - TestVector> res(1U << num_qubits, {1.0, 0.0}); + TestVector> res( + 1U << num_qubits, {1.0, 0.0}, test_allocator>); for (auto &elt : res) { elt /= std::sqrt(1U << num_qubits); } @@ -218,7 +221,8 @@ auto createRandomState(RandomEngine &re, size_t num_qubits) -> TestVector> { using Util::squaredNorm; - TestVector> res(1U << num_qubits, {0.0, 0.0}); + TestVector> res( + 1U << num_qubits, {0.0, 0.0}, test_allocator>); std::uniform_real_distribution dist; for (size_t idx = 0; idx < (1U << num_qubits); idx++) { res[idx] = {dist(re), dist(re)}; @@ -238,7 +242,8 @@ template auto createProductState(std::string_view str) -> TestVector> { using Pennylane::Util::INVSQRT2; - TestVector> st; + TestVector> st( + test_allocator>); st.resize(1U << str.length()); std::vector zero{1.0, 0.0}; diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp index 74e6f3a767..530f7916fb 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp @@ -164,8 +164,9 @@ void testApplyRY() { {0.10575112905629831, -0.47593196040758534}, {-0.8711876098966215, -0.0577721051072477}}}; - const TestVector init_state{{0.8775825618903728, 0.0}, - {0.0, -0.47942553860420306}}; + const TestVector init_state{ + {{0.8775825618903728, 0.0}, {0.0, -0.47942553860420306}}, + test_allocator}; DYNAMIC_SECTION(GateImplementation::name << ", RY - " << PrecisionToName::value) { for (size_t index = 0; index < angles.size(); index++) { @@ -373,15 +374,17 @@ void testApplyIsingXX() { << PrecisionToName::value) { const size_t num_qubits = 3; const auto ini_st = TestVector{ - ComplexPrecisionT{0.125681356503, 0.252712197380}, - ComplexPrecisionT{0.262591068130, 0.370189000494}, - ComplexPrecisionT{0.129300299863, 0.371057794075}, - ComplexPrecisionT{0.392248682814, 0.195795523118}, - ComplexPrecisionT{0.303908059240, 0.082981563244}, - ComplexPrecisionT{0.189140284321, 0.179512645957}, - ComplexPrecisionT{0.173146612336, 0.092249594834}, - ComplexPrecisionT{0.298857179897, 0.269627836165}, - }; + { + ComplexPrecisionT{0.125681356503, 0.252712197380}, + ComplexPrecisionT{0.262591068130, 0.370189000494}, + ComplexPrecisionT{0.129300299863, 0.371057794075}, + ComplexPrecisionT{0.392248682814, 0.195795523118}, + ComplexPrecisionT{0.303908059240, 0.082981563244}, + ComplexPrecisionT{0.189140284321, 0.179512645957}, + ComplexPrecisionT{0.173146612336, 0.092249594834}, + ComplexPrecisionT{0.298857179897, 0.269627836165}, + }, + test_allocator}; const std::vector wires = {0, 2}; const ParamT angle = 0.267030328057308; std::vector expected{ @@ -507,23 +510,23 @@ void testApplyIsingYY() { const size_t num_qubits = 4; const auto ini_st = TestVector{ - ComplexPrecisionT{0.276522701942, 0.192601873155}, - ComplexPrecisionT{0.035951282872, 0.224882549474}, - ComplexPrecisionT{0.142578003191, 0.016769549184}, - ComplexPrecisionT{0.207510965432, 0.068085008177}, - ComplexPrecisionT{0.231177902264, 0.039974505646}, - ComplexPrecisionT{0.038587049391, 0.058503643276}, - ComplexPrecisionT{0.023121176451, 0.294843178966}, - ComplexPrecisionT{0.297936734810, 0.061981734524}, - ComplexPrecisionT{0.140961289031, 0.061129422308}, - ComplexPrecisionT{0.204531438234, 0.159178277448}, - ComplexPrecisionT{0.143828437747, 0.031972463787}, - ComplexPrecisionT{0.291528706380, 0.138875986482}, - ComplexPrecisionT{0.297088897520, 0.179914971203}, - ComplexPrecisionT{0.032991360504, 0.024025500927}, - ComplexPrecisionT{0.121553926676, 0.263606060346}, - ComplexPrecisionT{0.177173454285, 0.267447421480}, - }; + {ComplexPrecisionT{0.276522701942, 0.192601873155}, + ComplexPrecisionT{0.035951282872, 0.224882549474}, + ComplexPrecisionT{0.142578003191, 0.016769549184}, + ComplexPrecisionT{0.207510965432, 0.068085008177}, + ComplexPrecisionT{0.231177902264, 0.039974505646}, + ComplexPrecisionT{0.038587049391, 0.058503643276}, + ComplexPrecisionT{0.023121176451, 0.294843178966}, + ComplexPrecisionT{0.297936734810, 0.061981734524}, + ComplexPrecisionT{0.140961289031, 0.061129422308}, + ComplexPrecisionT{0.204531438234, 0.159178277448}, + ComplexPrecisionT{0.143828437747, 0.031972463787}, + ComplexPrecisionT{0.291528706380, 0.138875986482}, + ComplexPrecisionT{0.297088897520, 0.179914971203}, + ComplexPrecisionT{0.032991360504, 0.024025500927}, + ComplexPrecisionT{0.121553926676, 0.263606060346}, + ComplexPrecisionT{0.177173454285, 0.267447421480}}, + test_allocator}; const std::vector wires = {0, 1}; const ParamT angle = 0.312; @@ -661,23 +664,23 @@ void testApplyIsingZZ() { const size_t num_qubits = 4; TestVector ini_st{ - ComplexPrecisionT{0.267462841882, 0.010768564798}, - ComplexPrecisionT{0.228575129706, 0.010564590956}, - ComplexPrecisionT{0.099492749900, 0.260849823392}, - ComplexPrecisionT{0.093690204310, 0.189847108173}, - ComplexPrecisionT{0.033390732374, 0.203836830144}, - ComplexPrecisionT{0.226979395737, 0.081852150975}, - ComplexPrecisionT{0.031235505729, 0.176933497281}, - ComplexPrecisionT{0.294287602843, 0.145156781198}, - ComplexPrecisionT{0.152742706049, 0.111628061129}, - ComplexPrecisionT{0.012553863703, 0.120027860480}, - ComplexPrecisionT{0.237156555364, 0.154658769755}, - ComplexPrecisionT{0.117001120872, 0.228059505033}, - ComplexPrecisionT{0.041495873225, 0.065934827444}, - ComplexPrecisionT{0.089653239407, 0.221581340372}, - ComplexPrecisionT{0.217892322429, 0.291261296999}, - ComplexPrecisionT{0.292993251871, 0.186570798697}, - }; + {ComplexPrecisionT{0.267462841882, 0.010768564798}, + ComplexPrecisionT{0.228575129706, 0.010564590956}, + ComplexPrecisionT{0.099492749900, 0.260849823392}, + ComplexPrecisionT{0.093690204310, 0.189847108173}, + ComplexPrecisionT{0.033390732374, 0.203836830144}, + ComplexPrecisionT{0.226979395737, 0.081852150975}, + ComplexPrecisionT{0.031235505729, 0.176933497281}, + ComplexPrecisionT{0.294287602843, 0.145156781198}, + ComplexPrecisionT{0.152742706049, 0.111628061129}, + ComplexPrecisionT{0.012553863703, 0.120027860480}, + ComplexPrecisionT{0.237156555364, 0.154658769755}, + ComplexPrecisionT{0.117001120872, 0.228059505033}, + ComplexPrecisionT{0.041495873225, 0.065934827444}, + ComplexPrecisionT{0.089653239407, 0.221581340372}, + ComplexPrecisionT{0.217892322429, 0.291261296999}, + ComplexPrecisionT{0.292993251871, 0.186570798697}}, + test_allocator}; const std::vector wires = {0, 1}; const ParamT angle = 0.312; diff --git a/pennylane_lightning/src/tests/Test_Internal.cpp b/pennylane_lightning/src/tests/Test_Internal.cpp index 99233b89d6..284bf9a77c 100644 --- a/pennylane_lightning/src/tests/Test_Internal.cpp +++ b/pennylane_lightning/src/tests/Test_Internal.cpp @@ -114,7 +114,7 @@ TEST_CASE("createAllWires", "[Test_Internal]") { {4, 2}, {8, 3}, {12, 1}, {12, 2}, {12, 3}, {12, 4}, {12, 5}, {12, 6}, {12, 7}, {12, 8}, {12, 9}, {12, 10}, {12, 11}, {12, 12}}; - for (const auto& [n, r] : test_pairs) { + for (const auto &[n, r] : test_pairs) { std::vector> vec; auto v = CombinationGenerator(n, r).all_perms(); @@ -139,7 +139,7 @@ TEST_CASE("createAllWires", "[Test_Internal]") { const std::vector> test_pairs{ {4, 2}, {8, 3}, {12, 1}, {12, 2}, {12, 3}, {12, 4}, {12, 5}}; - for (const auto& [n, r] : test_pairs) { + for (const auto &[n, r] : test_pairs) { auto v = PermutationGenerator(n, r).all_perms(); REQUIRE(v.size() == permSize(n, r)); diff --git a/pennylane_lightning/src/util/Memory.hpp b/pennylane_lightning/src/util/Memory.hpp index 0f5e3aa9f7..235a581a34 100644 --- a/pennylane_lightning/src/util/Memory.hpp +++ b/pennylane_lightning/src/util/Memory.hpp @@ -10,13 +10,15 @@ // limitations under the License. #pragma once +#include +#include #include #include #include #include #include -#include "ConstantUtil.hpp" +#include "BitUtil.hpp" #include "TypeList.hpp" /* Apple clang does not support std::aligned_alloc in Mac 10.14 */ @@ -24,50 +26,58 @@ namespace Pennylane { /** * @brief Custom aligned allocate function. As appleclang does not support - * std::aligned_alloc in Mac OS 10.14, we use posix memalign + * std::aligned_alloc in Mac OS 10.14, we use posix_memalign function. + * + * Note that alignment must be larger than max_align_t. */ -inline auto alignedAlloc(uint32_t alignment, size_t bytes) -> void* { +inline auto alignedAlloc(uint32_t alignment, size_t bytes) -> void * { #if defined(__clang__) // probably AppleClang - void* p; + void *p; posix_memalign(&p, alignment, bytes); return p; -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) return _aligned_malloc(bytes, alignment); #else return std::aligned_alloc(alignment, bytes); #endif } -inline void alignedFree(void* p) { +inline void alignedFree(void *p) { #if defined(__clang__) - return free(p); -#elif defined(_MSC_VER) + return ::free(p); // NOLINT(hicpp-no-malloc) +#elif defined(_MSC_VER) return _aligned_free(p); #else return std::free(p); #endif } -template struct AlignedAllocator { - static_assert(Util::constIsPerfectPowerOf2(alignment), - "Template parameter alignment must be power of 2."); +template struct AlignedAllocator { + uint32_t alignment_; using value_type = T; - AlignedAllocator() = default; + constexpr explicit AlignedAllocator(uint32_t alignment) + : alignment_{alignment} { + // assert(Util::isPerfectPowerOf2(alignment)); + } - template struct rebind { - using other = AlignedAllocator; - }; + template struct rebind { using other = AlignedAllocator; }; template explicit constexpr AlignedAllocator( - [[maybe_unused]] const AlignedAllocator &rhs) noexcept {} + [[maybe_unused]] const AlignedAllocator &rhs) noexcept + : alignment_{rhs.alignment_} {} [[nodiscard]] T *allocate(std::size_t size) { if (size == 0) { return nullptr; } - void *p = alignedAlloc(alignment, sizeof(T) * size); + void *p; + if (alignment_ > alignof(std::max_align_t)) { + p = alignedAlloc(alignment_, sizeof(T) * size); + } else { + p = malloc(sizeof(T) * size); + } if (p == nullptr) { throw std::bad_alloc(); } @@ -86,15 +96,15 @@ template struct AlignedAllocator { } }; -template -bool operator==([[maybe_unused]] const AlignedAllocator &lhs, - [[maybe_unused]] const AlignedAllocator &rhs) { +template +bool operator==([[maybe_unused]] const AlignedAllocator &lhs, + [[maybe_unused]] const AlignedAllocator &rhs) { return true; } template -bool operator!=([[maybe_unused]] const AlignedAllocator &lhs, - [[maybe_unused]] const AlignedAllocator &rhs) { +bool operator!=([[maybe_unused]] const AlignedAllocator &lhs, + [[maybe_unused]] const AlignedAllocator &rhs) { return false; } @@ -117,9 +127,4 @@ template <> struct commonAlignmentHelper { template [[maybe_unused]] constexpr static size_t common_alignment = commonAlignmentHelper::value; - -template -using PLAllocator = std::conditional_t, - AlignedAllocator>; - } // namespace Pennylane From 30c7d42fb5325c9086429c21ebe1f48d23582f82 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 2 Mar 2022 13:57:13 -0500 Subject: [PATCH 17/94] Clean-up, fix ld warnings for appleclang --- CMakeLists.txt | 2 +- pennylane_lightning/src/bindings/Bindings.hpp | 121 ------------------ pennylane_lightning/src/gates/Constant.hpp | 65 ---------- 3 files changed, 1 insertion(+), 187 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 89f665e31e..5e143fd62d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,7 +42,6 @@ option(ENABLE_BLAS "Enable BLAS" OFF) option(BUILD_TESTS "Build cpp tests" OFF) option(BUILD_EXAMPLES "Build cpp examples" OFF) - # Process compile options include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/process_options.cmake") @@ -57,6 +56,7 @@ FetchContent_MakeAvailable(pybind11) # All CMakeLists.txt in subdirectories use pennylane_lightning_compile_options and pennylane_lightning_external_libs add_subdirectory(pennylane_lightning/src) +set(CMAKE_CXX_VISIBILITY_PRESET hidden) ##################################################### # Maintain for dependent external package development diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index 9e2fe54114..9a3a4aed1b 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -163,127 +163,6 @@ void apply(pybind11::array_t> &stateNumpyArray, state.applyOperations(ops, wires, inverse, params); } -/** - * @brief Return a specific lambda function for the given kernel and gate - * operation - * - * We do not expect template parameters kernel and gate_op can be function - * parameters as we want the lambda function to be a stateless. - * - * @tparam PrecisionT Floating point precision of underlying statevector data - * @tparam ParamT Floating point type of gate parameters - * @tparam kernel Kernel to register - * @tparam gate_op Gate operation - */ -/* -template -constexpr auto getLambdaForKernelGateOp() { - namespace py = pybind11; - using namespace Pennylane::Gates; - using GateImplementation = SelectKernel; - - static_assert(array_has_elt(GateImplementation::implemented_gates, gate_op), - "The operator to register must be implemented."); - - if constexpr (gate_op != GateOperation::Matrix) { - return - [](StateVectorRawCPU &st, const std::vector -&wires, bool inverse, const std::vector ¶ms) { constexpr auto -func_ptr = GateOpToMemberFuncPtr::value; callGateOps(func_ptr, st.getData(), st.getNumQubits(), wires, - inverse, params); - }; - } else { - return [](StateVectorRawCPU &st, - const py::array_t, - py::array::c_style | py::array::forcecast> - &matrix, - const std::vector &wires, bool inverse = false) { - st.template applyMatrix_( - static_cast *>(matrix.request().ptr), - wires, inverse); - }; - } -}; -*/ -/* -/// @cond DEV -template -constexpr auto getGateOpLambdaPairsIter() { - using Pennylane::Gates::SelectKernel; - if constexpr (gate_idx < SelectKernel::implemented_gates.size()) { - constexpr auto gate_op = - SelectKernel::implemented_gates[gate_idx]; - return prepend_to_tuple( - std::pair{gate_op, getLambdaForKernelGateOp()}, - getGateOpLambdaPairsIter()); - } else { - return std::tuple{}; - } -} -/// @endcond -*/ -/** - * @brief Create a tuple of lambda functions to bind - * - * @tparam PrecisionT Floating point precision of underlying statevector data - * @tparam ParamT Floating point type of gate parameters - * @tparam kernel Kernel to register - */ -/* -template -constexpr auto getGateOpLambdaPairs() { - return getGateOpLambdaPairsIter(); -} -*/ - -/** - * @brief For given kernel, register all implemented gate operations and apply - * matrix. - * - * @tparam PrecisionT Floating point precision of underlying statevector data - * @tparam ParamT Floating point type of gate parameters - * @tparam Kernel Kernel to register - * @tparam PyClass Pybind11 class type - */ -/* -template -void registerImplementedGatesForKernel(PyClass &pyclass) { - using namespace Pennylane::Gates; - - auto registerToPyclass = - [&pyclass](auto &&gate_op_lambda_pair) -> GateOperation { - const auto &[gate_op, func] = gate_op_lambda_pair; - if (gate_op == GateOperation::Matrix) { - const std::string name = "applyMatrix_" + kernel_name; - const std::string doc = "Apply a given matrix to wires."; - pyclass.def(name.c_str(), func, doc.c_str()); - } else { - const auto gate_name = - std::string(lookup(Constant::gate_names, gate_op)); - const std::string doc = "Apply the " + gate_name + " gate."; - auto func = [&gate_name](StateVectorManagedCPU& sv, - const std::vector &wires, - bool inverse, - const std::vector ¶ms) { - sv.applyOperation(gate_name, wires, inverse, params); - } - pyclass.def(name.c_str(), , doc.c_str()); - } - return gate_op; - }; - - [[maybe_unused]] const auto registerd_gate_ops = std::apply( - [®isterToPyclass](auto... elt) { - return std::make_tuple(registerToPyclass(elt)...); - }, - gate_op_lambda_pairs); -} -*/ /// @cond DEV template void registerGatesForStateVector(PyClass &pyclass) { diff --git a/pennylane_lightning/src/gates/Constant.hpp b/pennylane_lightning/src/gates/Constant.hpp index 62f0859829..00c061cc43 100644 --- a/pennylane_lightning/src/gates/Constant.hpp +++ b/pennylane_lightning/src/gates/Constant.hpp @@ -205,69 +205,4 @@ namespace Pennylane::Gates::Constant { std::pair{GateOperation::CSWAP, 0}, std::pair{GateOperation::MultiRZ, 1}, }; - -/** - * - * @brief Define which kernel to use for each gate operation. - * - * @rst - * Check - * `this repository - * `_ to see - * the benchmark results for each gate - * @endrst - * - * This value is used for: - * 1. StateVector apply##GATE_NAME methods. The kernel function is statically - * binded to the given kernel and cannot be modified. - * 2. Default kernel functions for DynamicDispatcher. The kernel function is - * dynamically binded and can be changed using DynamicDispatcher singleton - * class. - * 3. For the Python binding. - */ -[[maybe_unused]] constexpr std::array default_kernel_for_gates = { - std::pair{GateOperation::PauliX, KernelType::LM}, - std::pair{GateOperation::PauliY, KernelType::LM}, - std::pair{GateOperation::PauliZ, KernelType::LM}, - std::pair{GateOperation::Hadamard, KernelType::PI}, - std::pair{GateOperation::S, KernelType::LM}, - std::pair{GateOperation::T, KernelType::LM}, - std::pair{GateOperation::RX, KernelType::PI}, - std::pair{GateOperation::RY, KernelType::PI}, - std::pair{GateOperation::RZ, KernelType::LM}, - std::pair{GateOperation::PhaseShift, KernelType::LM}, - std::pair{GateOperation::Rot, KernelType::LM}, - std::pair{GateOperation::ControlledPhaseShift, KernelType::PI}, - std::pair{GateOperation::CNOT, KernelType::LM}, - std::pair{GateOperation::CY, KernelType::PI}, - std::pair{GateOperation::CZ, KernelType::LM}, - std::pair{GateOperation::SWAP, KernelType::LM}, - std::pair{GateOperation::IsingXX, KernelType::LM}, - std::pair{GateOperation::IsingYY, KernelType::LM}, - std::pair{GateOperation::IsingZZ, KernelType::LM}, - std::pair{GateOperation::CRX, KernelType::LM}, - std::pair{GateOperation::CRY, KernelType::LM}, - std::pair{GateOperation::CRZ, KernelType::LM}, - std::pair{GateOperation::CRot, KernelType::PI}, - std::pair{GateOperation::Toffoli, KernelType::PI}, - std::pair{GateOperation::CSWAP, KernelType::PI}, - std::pair{GateOperation::MultiRZ, KernelType::LM}, -}; -/** - * @brief Define which kernel to use for each generator operation. - */ -[[maybe_unused]] constexpr std::array default_kernel_for_generators = { - std::pair{GeneratorOperation::PhaseShift, KernelType::PI}, - std::pair{GeneratorOperation::RX, KernelType::LM}, - std::pair{GeneratorOperation::RY, KernelType::LM}, - std::pair{GeneratorOperation::RZ, KernelType::LM}, - std::pair{GeneratorOperation::IsingXX, KernelType::LM}, - std::pair{GeneratorOperation::IsingYY, KernelType::LM}, - std::pair{GeneratorOperation::IsingZZ, KernelType::LM}, - std::pair{GeneratorOperation::CRX, KernelType::PI}, - std::pair{GeneratorOperation::CRY, KernelType::PI}, - std::pair{GeneratorOperation::CRZ, KernelType::PI}, - std::pair{GeneratorOperation::ControlledPhaseShift, KernelType::PI}, - std::pair{GeneratorOperation::MultiRZ, KernelType::LM}, -}; } // namespace Pennylane::Gates::Constant From f33876e96879dcf29cbb93f45b3f7cbe4b300e7e Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 2 Mar 2022 14:19:10 -0500 Subject: [PATCH 18/94] Fix --- .../src/simulator/StateVectorBase.hpp | 123 ------------------ .../src/tests/TestAvailableKernels.hpp | 16 --- .../src/tests/TestConstant.hpp | 21 --- 3 files changed, 160 deletions(-) diff --git a/pennylane_lightning/src/simulator/StateVectorBase.hpp b/pennylane_lightning/src/simulator/StateVectorBase.hpp index 4853754815..1b48512164 100644 --- a/pennylane_lightning/src/simulator/StateVectorBase.hpp +++ b/pennylane_lightning/src/simulator/StateVectorBase.hpp @@ -59,17 +59,6 @@ Gates::SelectKernel::apply##GATE_NAME( \ arr, num_qubits_, wires, inverse, std::forward(args)...); \ } - -#define PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(GATE_NAME) \ - template \ - inline void apply##GATE_NAME(const std::vector &wires, \ - bool inverse, Ts &&...args) { \ - constexpr auto kernel = \ - Gates::static_lookup( \ - Gates::Constant::default_kernel_for_gates); \ - apply##GATE_NAME##_(wires, inverse, \ - std::forward(args)...); \ - } #define PENNYLANE_STATEVECTOR_DEFINE_GENERATOR(GENERATOR_NAME) \ template \ inline void applyGenerator##GENERATOR_NAME##_( \ @@ -464,12 +453,6 @@ template class StateVectorBase { */ PENNYLANE_STATEVECTOR_DEFINE_GATE(PauliX) - /** - * @brief Apply PauliX gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(PauliX) - /** * @brief Apply PauliY gate operation to given indices of statevector. * @@ -478,12 +461,6 @@ template class StateVectorBase { */ PENNYLANE_STATEVECTOR_DEFINE_GATE(PauliY) - /** - * @brief Apply PauliY gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(PauliY) - /** * @brief Apply PauliZ gate operation to given indices of statevector. * @@ -491,11 +468,6 @@ template class StateVectorBase { * @param inverse Take adjoint of given operation. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(PauliZ) - /** - * @brief Apply PauliZ gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(PauliZ) /** * @brief Apply Hadamard gate operation to given indices of statevector. @@ -504,11 +476,6 @@ template class StateVectorBase { * @param inverse Take adjoint of given operation. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(Hadamard) - /** - * @brief Apply Hadamard gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(Hadamard) /** * @brief Apply S gate operation to given indices of statevector. @@ -517,11 +484,6 @@ template class StateVectorBase { * @param inverse Take adjoint of given operation. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(S) - /** - * @brief Apply S gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(S) /** * @brief Apply T gate operation to given indices of statevector. @@ -530,11 +492,6 @@ template class StateVectorBase { * @param inverse Take adjoint of given operation. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(T) - /** - * @brief Apply T gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(T) /** * @brief Apply RX gate operation to given indices of statevector. @@ -544,11 +501,6 @@ template class StateVectorBase { * @param angle Rotation angle of gate. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(RX) - /** - * @brief Apply RX gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(RX) /** * @brief Apply RY gate operation to given indices of statevector. @@ -558,11 +510,6 @@ template class StateVectorBase { * @param angle Rotation angle of gate. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(RY) - /** - * @brief Apply RY gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(RY) /** * @brief Apply RZ gate operation to given indices of statevector. @@ -572,11 +519,6 @@ template class StateVectorBase { * @param angle Rotation angle of gate. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(RZ) - /** - * @brief Apply RZ gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(RZ) /** * @brief Apply phase shift gate operation to given indices of statevector. @@ -586,11 +528,6 @@ template class StateVectorBase { * @param angle Phase shift angle. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(PhaseShift) - /** - * @brief Apply PhaseShift gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(PhaseShift) /* * @brief Apply Rot gate \f$RZ(\omega)RY(\theta)RZ(\phi)\f$ to given indices @@ -603,11 +540,6 @@ template class StateVectorBase { * @param omega Gate rotation parameter \f$\omega\f$. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(Rot) - /** - * @brief Apply Rot gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(Rot) /** * @brief Apply controlled phase shift gate operation to given indices of @@ -618,11 +550,6 @@ template class StateVectorBase { * @param angle Phase shift angle. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(ControlledPhaseShift) - /** - * @brief Apply controlled phase shift gate operation using a kernel given - * in default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(ControlledPhaseShift) /** * @brief Apply CNOT (CX) gate to given indices of statevector. @@ -631,11 +558,6 @@ template class StateVectorBase { * @param inverse Take adjoint of given operation. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(CNOT) - /** - * @brief Apply CNOT gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(CNOT) /** * @brief Apply CY gate to given indices of statevector. @@ -644,11 +566,6 @@ template class StateVectorBase { * @param inverse Take adjoint of given operation. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(CY) - /** - * @brief Apply CY gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(CY) /** * @brief Apply CZ gate to given indices of statevector. @@ -657,11 +574,6 @@ template class StateVectorBase { * @param inverse Take adjoint of given operation. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(CZ) - /** - * @brief Apply CZ gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(CZ) /** * @brief Apply SWAP gate to given indices of statevector. @@ -670,11 +582,6 @@ template class StateVectorBase { * @param inverse Take adjoint of given operation. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(SWAP) - /** - * @brief Apply SWAP gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(SWAP) /** * @brief Apply CRX gate to given indices of statevector. @@ -684,11 +591,6 @@ template class StateVectorBase { * @param angle Rotation angle of gate. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(CRX) - /** - * @brief Apply CRX gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(CRX) /** * @brief Apply CRY gate to given indices of statevector. @@ -698,11 +600,6 @@ template class StateVectorBase { * @param angle Rotation angle of gate. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(CRY) - /** - * @brief Apply CRY gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(CRY) /** * @brief Apply CRZ gate to given indices of statevector. @@ -712,11 +609,6 @@ template class StateVectorBase { * @param angle Rotation angle of gate. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(CRZ) - /** - * @brief Apply CRZ gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(CRZ) /** * @brief Apply CRot gate (controlled \f$RZ(\omega)RY(\theta)RZ(\phi)\f$) to @@ -729,11 +621,6 @@ template class StateVectorBase { * @param omega Gate rotation parameter \f$\omega\f$. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(CRot) - /** - * @brief Apply CRot gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(CRot) /** * @brief Apply Toffoli (CCX) gate to given indices of statevector. @@ -742,11 +629,6 @@ template class StateVectorBase { * @param inverse Take adjoint of given operation. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(Toffoli) - /** - * @brief Apply Toffoli gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(Toffoli) /** * @brief Apply CSWAP gate to given indices of statevector. @@ -755,11 +637,6 @@ template class StateVectorBase { * @param inverse Take adjoint of given operation. */ PENNYLANE_STATEVECTOR_DEFINE_GATE(CSWAP) - /** - * @brief Apply CSWAP gate operation using a kernel given in - * default_kernel_for_gates - */ - PENNYLANE_STATEVECTOR_DEFINE_DEFAULT_GATE(CSWAP) }; /** diff --git a/pennylane_lightning/src/tests/TestAvailableKernels.hpp b/pennylane_lightning/src/tests/TestAvailableKernels.hpp index 4170856b96..669d98ddc8 100644 --- a/pennylane_lightning/src/tests/TestAvailableKernels.hpp +++ b/pennylane_lightning/src/tests/TestAvailableKernels.hpp @@ -68,20 +68,4 @@ static_assert(Util::count_unique(Util::first_elts_of(kernel_id_name_pairs)) == static_assert(Util::count_unique(Util::second_elts_of(kernel_id_name_pairs)) == Util::length(), "Kernel names must be distinct."); - -/******************************************************************************* - * Check all kernels in default_kernel_for_gates are available - ******************************************************************************/ - -static_assert(check_kernels_are_available( - Util::second_elts_of(Constant::default_kernel_for_gates)), - "default_kernel_for_gates contains an unavailable kernel"); - -/******************************************************************************* - * Check all kernels in default_kernel_for_generators are available - ******************************************************************************/ - -static_assert(check_kernels_are_available(Util::second_elts_of( - Constant::default_kernel_for_generators)), - "default_kernel_for_gates contains an unavailable kernel"); } // namespace Pennylane::Gates diff --git a/pennylane_lightning/src/tests/TestConstant.hpp b/pennylane_lightning/src/tests/TestConstant.hpp index 4d20e25af2..8231fb1a97 100644 --- a/pennylane_lightning/src/tests/TestConstant.hpp +++ b/pennylane_lightning/src/tests/TestConstant.hpp @@ -97,25 +97,4 @@ static_assert( Util::count_unique(Util::first_elts_of(Constant::generator_wires)) == Constant::generator_wires.size(), "First elements of generator_wires must be distinct."); - -/******************************************************************************* - * Check default_kernel_for_gates are defined for all gates - ******************************************************************************/ - -static_assert( - Util::count_unique( - Util::first_elts_of(Constant::default_kernel_for_gates)) == - static_cast(GateOperation::END), - "Constant default_kernel_for_gates must be defined for all gates."); - -/******************************************************************************* - * Check default_kernel_for_generators are defined for all generators - ******************************************************************************/ - -static_assert(Util::count_unique(Util::first_elts_of( - Constant::default_kernel_for_generators)) == - static_cast(GeneratorOperation::END), - "Constant default_kernel_for_generators must be defined for all " - "generators."); - } // namespace Pennylane::Gates From af6387b262c6cee53ad8b4d5312d75623e1cc08e Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 2 Mar 2022 22:53:05 +0000 Subject: [PATCH 19/94] Fix memory allocate mechanism; Fix some tests for MSVX --- pennylane_lightning/src/bindings/Bindings.hpp | 41 +++++++----------- .../cpu_kernels/GateImplementationsLM.hpp | 4 +- .../src/tests/CreateAllWires.cpp | 2 +- pennylane_lightning/src/tests/TestHelpers.hpp | 8 ++-- .../src/tests/Test_AdjDiff.cpp | 6 ++- .../src/tests/Test_DynamicDispatcher.cpp | 43 ++++++++----------- ...est_GateImplementations_CompareKernels.cpp | 12 +++--- .../Test_GateImplementations_Generator.cpp | 6 +-- .../Test_GateImplementations_Inverse.cpp | 2 +- pennylane_lightning/src/util/Memory.hpp | 6 ++- 10 files changed, 60 insertions(+), 70 deletions(-) diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index 9a3a4aed1b..039d516dde 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -97,7 +97,18 @@ auto getNumpyArrayAlignment(const pybind11::array &numpyArray) return getMemoryModel(numpyArray.request().ptr); } -void deallocateArray(void *ptr) { alignedFree(ptr); } +template +auto alignedNumpyArray(CPUMemoryModel memory_model, size_t size) -> pybind11::array { + if (getAlignment(memory_model) > alignof(std::max_align_t)) { + void* ptr = alignedAlloc(getAlignment(memory_model), + sizeof(T) * size); + auto capsule = pybind11::capsule(ptr, &alignedFree); + return pybind11::array{pybind11::dtype::of(), {size}, {sizeof(T)}, ptr, capsule}; + } // else + void* ptr = malloc(sizeof(T) * size); + auto capsule = pybind11::capsule(ptr, free); + return pybind11::array{ pybind11::dtype::of(), {size}, {sizeof(T)}, ptr, capsule }; +} /** * @brief We return an numpy array whose underlying data is allocated by @@ -110,33 +121,13 @@ auto allocateAlignedArray(size_t size, pybind11::dtype dt) -> pybind11::array { auto memory_model = bestCPUMemoryModel(); if (dt.is(pybind11::dtype::of())) { - void *ptr = alignedAlloc(getAlignment(memory_model), - sizeof(float) * size); - auto capsule = pybind11::capsule(ptr, &deallocateArray); - - return pybind11::array{dt, {size}, {sizeof(float)}, ptr, capsule}; + return alignedNumpyArray(memory_model, size); } else if (dt.is(pybind11::dtype::of())) { - void *ptr = alignedAlloc(getAlignment(memory_model), - sizeof(double) * size); - auto capsule = pybind11::capsule(ptr, &deallocateArray); - - return pybind11::array{dt, {size}, {sizeof(double)}, ptr, capsule}; + return alignedNumpyArray(memory_model, size); } else if (dt.is(pybind11::dtype::of>())) { - void *ptr = - alignedAlloc(getAlignment>(memory_model), - sizeof(std::complex) * size); - auto capsule = pybind11::capsule(ptr, &deallocateArray); - - return pybind11::array{ - dt, {size}, {sizeof(std::complex)}, ptr, capsule}; + return alignedNumpyArray>(memory_model, size); } else if (dt.is(pybind11::dtype::of>())) { - void *ptr = - alignedAlloc(getAlignment>(memory_model), - sizeof(std::complex) * size); - auto capsule = pybind11::capsule(ptr, &deallocateArray); - - return pybind11::array{ - dt, {size}, {sizeof(std::complex)}, ptr, capsule}; + return alignedNumpyArray>(memory_model, size); } else { throw pybind11::type_error("Unsupported datatype."); } diff --git a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp index 651731f8d2..04a0df9ab1 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp @@ -253,7 +253,7 @@ class GateImplementationsLM : public PauliGenerator { const std::vector &wires, bool inverse) { assert(num_qubits >= wires.size()); - size_t dim = 1U << wires.size(); + size_t dim = static_cast(1U) << wires.size(); std::vector indices; indices.resize(dim); std::vector> coeffs_in(dim, 0.0); @@ -1324,7 +1324,7 @@ class GateImplementationsLM : public PauliGenerator { } for (size_t k = 0; k < Util::exp2(num_qubits); k++) { - arr[k] *= (2 * int(Util::popcount(k & wires_parity) % 2) - 1); + arr[k] *= static_cast(2 * int(Util::popcount(k & wires_parity) % 2) - 1); } // NOLINTNEXTLINE(readability-magic-numbers) return static_cast(0.5); diff --git a/pennylane_lightning/src/tests/CreateAllWires.cpp b/pennylane_lightning/src/tests/CreateAllWires.cpp index ecea28089c..dd0194a625 100644 --- a/pennylane_lightning/src/tests/CreateAllWires.cpp +++ b/pennylane_lightning/src/tests/CreateAllWires.cpp @@ -7,7 +7,7 @@ auto crateAllWires(size_t n_qubits, Gates::GateOperation gate_op, bool order) std::vector> res; res.reserve((1U << n_qubits) - 1); ; - for (size_t k = 1; k < (1U << n_qubits); k++) { + for (size_t k = 1; k < (static_cast(1U) << n_qubits); k++) { std::vector wires; wires.reserve(Util::popcount(k)); diff --git a/pennylane_lightning/src/tests/TestHelpers.hpp b/pennylane_lightning/src/tests/TestHelpers.hpp index facb6372c7..5bc840a09e 100644 --- a/pennylane_lightning/src/tests/TestHelpers.hpp +++ b/pennylane_lightning/src/tests/TestHelpers.hpp @@ -222,9 +222,9 @@ auto createRandomState(RandomEngine &re, size_t num_qubits) using Util::squaredNorm; TestVector> res( - 1U << num_qubits, {0.0, 0.0}, test_allocator>); + static_cast(1U) << num_qubits, {0.0, 0.0}, test_allocator>); std::uniform_real_distribution dist; - for (size_t idx = 0; idx < (1U << num_qubits); idx++) { + for (size_t idx = 0; idx < (static_cast(1U) << num_qubits); idx++) { res[idx] = {dist(re), dist(re)}; } @@ -308,9 +308,9 @@ auto createParams(Gates::GateOperation op) -> std::vector { case 0: return {}; case 1: - return {0.312}; + return {static_cast(0.312)}; case 3: - return {0.128, -0.563, 1.414}; + return {static_cast(0.128), static_cast(-0.563), static_cast(1.414)}; default: PL_ABORT("The number of parameters for a given gate is unknown."); } diff --git a/pennylane_lightning/src/tests/Test_AdjDiff.cpp b/pennylane_lightning/src/tests/Test_AdjDiff.cpp index d1f9e94136..92ca6d0cc1 100644 --- a/pennylane_lightning/src/tests/Test_AdjDiff.cpp +++ b/pennylane_lightning/src/tests/Test_AdjDiff.cpp @@ -1,5 +1,3 @@ -#define _USE_MATH_DEFINES - #include #include #include @@ -18,6 +16,10 @@ #include "TestHelpers.hpp" +#if !defined(_USE_MATH_DEFINES) +#define _USE_MATH_DEFINES +#endif + using namespace Pennylane; using namespace Pennylane::Algorithms; diff --git a/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp b/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp index 3511a12da9..ddca995be0 100644 --- a/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp +++ b/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp @@ -28,14 +28,23 @@ using Pennylane::Gates::callGateOps; * We just check DynamicDispacther calls the correct functuion by comparing * the result from it with that of the direct call. */ -template + +template + struct testDispatchForKernel { + static void test(RandomEngine& re, size_t num_qubits) { + // Keep source, but allow clang-tidy to pass for unused + static_cast(re); + static_cast(num_qubits); + } // Do nothing if not implemented; + // This could probably be replaced with an enable_if or SFINAE-like + // pattern. +}; +template > struct testDispatchForKernel { - template < - GateOperation gate_op, class RandomEngine, - std::enable_if_t< - Util::array_has_elt(GateImplementation::implemented_gates, gate_op), - bool> = true> - static void test(RandomEngine &re, size_t num_qubits) { + static void test(RandomEngine& re, size_t num_qubits) { const auto ini_st = createRandomState(re, num_qubits); auto expected = ini_st; @@ -46,9 +55,9 @@ struct testDispatchForKernel { // in the GateImplementation auto gate_func = GateOpToMemberFuncPtr::value; + gate_op>::value; callGateOps(gate_func, expected.data(), num_qubits, wires, false, - params); + params); // and compare it to the dynamic dispatcher auto test_st = ini_st; @@ -59,19 +68,6 @@ struct testDispatchForKernel { gate_name, wires, false, params); REQUIRE(test_st == expected); } - - template < - GateOperation gate_op, class RandomEngine, - std::enable_if_t = true> - static void test(RandomEngine &re, size_t num_qubits) { - // Keep source, but allow clang-tidy to pass for unused - static_cast(re); - static_cast(num_qubits); - } // Do nothing if not implemented; - // This could probably be replaced with an enable_if or SFINAE-like - // pattern. }; template :: - template test(re, num_qubits); + testDispatchForKernel::test(re, num_qubits); } testAllGatesForKernelIter std::string kernelsToString() { if constexpr (!std::is_same_v) { - if constexpr (!std::is_same_v) { - return std::string(TypeList::Type::name) + ", " + - kernelsToString(); - } - return std::string(TypeList::Type::name); + return std::string(TypeList::Type::name) + ", " + + kernelsToString(); } + return std::string(""); } /* Type transformation */ @@ -128,7 +126,7 @@ void testApplyGate(RandomEngine &re, size_t num_qubits) { std::make_index_sequence()>())); for (size_t i = 0; i < results.size() - 1; i++) { - REQUIRE(results[i] == PLApprox(results[i + 1]).margin(1e-7)); + REQUIRE(results[i] == PLApprox(results[i + 1]).margin(static_cast(1e-5))); } } @@ -142,7 +140,7 @@ void testApplyGate(RandomEngine &re, size_t num_qubits) { std::make_index_sequence()>())); for (size_t i = 0; i < results.size() - 1; i++) { - REQUIRE(results[i] == PLApprox(results[i + 1]).margin(1e-7)); + REQUIRE(results[i] == PLApprox(results[i + 1]).margin(static_cast(1e-5))); } } } diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp index 4b80e5235b..b22221ec63 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp @@ -74,7 +74,7 @@ void testGeneratorForGate(RandomEngine &re, size_t num_qubits) { using ComplexPrecisionT = std::complex; constexpr auto I = Util::IMAG(); - constexpr ParamT eps = 1e-4; // For finite difference + constexpr ParamT eps = static_cast(1e-4); // For finite difference constexpr auto gate_op = static_lookup(generator_gate_pairs); constexpr auto gate_name = static_lookup(Constant::gate_names); @@ -105,7 +105,7 @@ void testGeneratorForGate(RandomEngine &re, size_t num_qubits) { gate_func(diff_st_1.data(), num_qubits, wires, false, eps); gate_func(diff_st_2.data(), num_qubits, wires, false, -eps); - std::vector gate_der_st(1U << num_qubits); + std::vector gate_der_st(static_cast(1U) << num_qubits); std::transform( diff_st_1.cbegin(), diff_st_1.cend(), diff_st_2.cbegin(), @@ -114,7 +114,7 @@ void testGeneratorForGate(RandomEngine &re, size_t num_qubits) { scaleVector(gate_der_st, static_cast(0.5) / eps); - REQUIRE(gntr_st == PLApprox(gate_der_st).margin(1e-3)); + REQUIRE(gntr_st == PLApprox(gate_der_st).margin(static_cast(1e-4))); } } template (1e-7))); } } diff --git a/pennylane_lightning/src/util/Memory.hpp b/pennylane_lightning/src/util/Memory.hpp index 235a581a34..c41a6004e2 100644 --- a/pennylane_lightning/src/util/Memory.hpp +++ b/pennylane_lightning/src/util/Memory.hpp @@ -85,7 +85,11 @@ template struct AlignedAllocator { } void deallocate(T *p, [[maybe_unused]] std::size_t size) noexcept { - alignedFree(p); + if (alignment_ > alignof(std::max_align_t)) { + alignedFree(p); + } else { + free(p); + } } template void construct(U *ptr) { ::new ((void *)ptr) U(); } From 5adac4122c0c0004732de38c1d753765cd9889f4 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 2 Mar 2022 18:57:20 -0500 Subject: [PATCH 20/94] Fix a UB --- .../src/algorithms/AdjointDiff.hpp | 12 +++++----- pennylane_lightning/src/bindings/Bindings.hpp | 15 +++++++----- .../src/examples/run_benchmark.py | 1 + .../cpu_kernels/GateImplementationsLM.hpp | 3 ++- pennylane_lightning/src/tests/TestHelpers.hpp | 6 +++-- .../src/tests/Test_DynamicDispatcher.cpp | 23 +++++++++++-------- ...est_GateImplementations_CompareKernels.cpp | 10 +++++--- .../Test_GateImplementations_Generator.cpp | 17 ++++++++++++-- .../src/util/LinearAlgebra.hpp | 12 +++++----- pennylane_lightning/src/util/Memory.hpp | 4 ++-- 10 files changed, 65 insertions(+), 38 deletions(-) mode change 100644 => 100755 pennylane_lightning/src/examples/run_benchmark.py diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp index c717681b40..b27e85f28c 100644 --- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp +++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp @@ -344,8 +344,8 @@ template class AdjointJacobian { applyOperations(lambda, ops); } - const auto tp_begin = tp.begin(); - auto tp_it = tp.end(); + const auto tp_rend = tp.rend(); + auto tp_it = tp.rbegin(); StateVectorManagedCPU sv{lambda.getNumQubits(), Threading::SingleThread}; @@ -368,9 +368,9 @@ template class AdjointJacobian { applyOperationAdj(lambda, ops, op_idx); if (ops.hasParams(op_idx)) { - if ((current_param_idx == *(std::prev(tp_it))) || - std::find(tp_begin, tp_it, current_param_idx) != - tp_it) { + if ((current_param_idx == *tp_it) || + std::find(tp_it, tp_rend, current_param_idx) != + tp_rend) { const T scalingFactor = applyGenerator(mu, ops_name[op_idx], ops.getOpsWires()[op_idx], @@ -399,7 +399,7 @@ template class AdjointJacobian { mu.getData(), mu.getLength())); } trainableParamNumber--; - std::advance(tp_it, -1); + ++tp_it; } current_param_idx--; } diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index 039d516dde..c0e20f5552 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -98,16 +98,19 @@ auto getNumpyArrayAlignment(const pybind11::array &numpyArray) } template -auto alignedNumpyArray(CPUMemoryModel memory_model, size_t size) -> pybind11::array { +auto alignedNumpyArray(CPUMemoryModel memory_model, size_t size) + -> pybind11::array { if (getAlignment(memory_model) > alignof(std::max_align_t)) { - void* ptr = alignedAlloc(getAlignment(memory_model), - sizeof(T) * size); + void *ptr = + alignedAlloc(getAlignment(memory_model), sizeof(T) * size); auto capsule = pybind11::capsule(ptr, &alignedFree); - return pybind11::array{pybind11::dtype::of(), {size}, {sizeof(T)}, ptr, capsule}; + return pybind11::array{ + pybind11::dtype::of(), {size}, {sizeof(T)}, ptr, capsule}; } // else - void* ptr = malloc(sizeof(T) * size); + void *ptr = malloc(sizeof(T) * size); auto capsule = pybind11::capsule(ptr, free); - return pybind11::array{ pybind11::dtype::of(), {size}, {sizeof(T)}, ptr, capsule }; + return pybind11::array{ + pybind11::dtype::of(), {size}, {sizeof(T)}, ptr, capsule}; } /** diff --git a/pennylane_lightning/src/examples/run_benchmark.py b/pennylane_lightning/src/examples/run_benchmark.py old mode 100644 new mode 100755 index f2a770d2c8..ae20d520b0 --- a/pennylane_lightning/src/examples/run_benchmark.py +++ b/pennylane_lightning/src/examples/run_benchmark.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 import subprocess import argparse import json diff --git a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp index 04a0df9ab1..87fb7469a8 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp @@ -1324,7 +1324,8 @@ class GateImplementationsLM : public PauliGenerator { } for (size_t k = 0; k < Util::exp2(num_qubits); k++) { - arr[k] *= static_cast(2 * int(Util::popcount(k & wires_parity) % 2) - 1); + arr[k] *= static_cast( + 2 * int(Util::popcount(k & wires_parity) % 2) - 1); } // NOLINTNEXTLINE(readability-magic-numbers) return static_cast(0.5); diff --git a/pennylane_lightning/src/tests/TestHelpers.hpp b/pennylane_lightning/src/tests/TestHelpers.hpp index 5bc840a09e..ab383909d4 100644 --- a/pennylane_lightning/src/tests/TestHelpers.hpp +++ b/pennylane_lightning/src/tests/TestHelpers.hpp @@ -222,7 +222,8 @@ auto createRandomState(RandomEngine &re, size_t num_qubits) using Util::squaredNorm; TestVector> res( - static_cast(1U) << num_qubits, {0.0, 0.0}, test_allocator>); + static_cast(1U) << num_qubits, {0.0, 0.0}, + test_allocator>); std::uniform_real_distribution dist; for (size_t idx = 0; idx < (static_cast(1U) << num_qubits); idx++) { res[idx] = {dist(re), dist(re)}; @@ -310,7 +311,8 @@ auto createParams(Gates::GateOperation op) -> std::vector { case 1: return {static_cast(0.312)}; case 3: - return {static_cast(0.128), static_cast(-0.563), static_cast(1.414)}; + return {static_cast(0.128), static_cast(-0.563), + static_cast(1.414)}; default: PL_ABORT("The number of parameters for a given gate is unknown."); } diff --git a/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp b/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp index ddca995be0..49e839ea25 100644 --- a/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp +++ b/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp @@ -30,9 +30,9 @@ using Pennylane::Gates::callGateOps; */ template - struct testDispatchForKernel { - static void test(RandomEngine& re, size_t num_qubits) { + GateOperation gate_op, class RandomEngine, class Enable = void> +struct testDispatchForKernel { + static void test(RandomEngine &re, size_t num_qubits) { // Keep source, but allow clang-tidy to pass for unused static_cast(re); static_cast(num_qubits); @@ -41,10 +41,12 @@ template > -struct testDispatchForKernel { - static void test(RandomEngine& re, size_t num_qubits) { + GateOperation gate_op, class RandomEngine> +struct testDispatchForKernel< + PrecisionT, ParamT, GateImplementation, gate_op, RandomEngine, + std::enable_if_t> { + static void test(RandomEngine &re, size_t num_qubits) { const auto ini_st = createRandomState(re, num_qubits); auto expected = ini_st; @@ -55,9 +57,9 @@ struct testDispatchForKernel { // in the GateImplementation auto gate_func = GateOpToMemberFuncPtr::value; + gate_op>::value; callGateOps(gate_func, expected.data(), num_qubits, wires, false, - params); + params); // and compare it to the dynamic dispatcher auto test_st = ini_st; @@ -79,7 +81,8 @@ constexpr void testAllGatesForKernelIter(RandomEngine &re, for (size_t num_qubits = 3; num_qubits <= max_num_qubits; num_qubits++) { - testDispatchForKernel::test(re, num_qubits); + testDispatchForKernel::test(re, num_qubits); } testAllGatesForKernelIter std::string kernelsToString() { if constexpr (!std::is_same_v) { return std::string(TypeList::Type::name) + ", " + - kernelsToString(); + kernelsToString(); } return std::string(""); } @@ -126,7 +126,9 @@ void testApplyGate(RandomEngine &re, size_t num_qubits) { std::make_index_sequence()>())); for (size_t i = 0; i < results.size() - 1; i++) { - REQUIRE(results[i] == PLApprox(results[i + 1]).margin(static_cast(1e-5))); + REQUIRE(results[i] == + PLApprox(results[i + 1]) + .margin(static_cast(1e-5))); } } @@ -140,7 +142,9 @@ void testApplyGate(RandomEngine &re, size_t num_qubits) { std::make_index_sequence()>())); for (size_t i = 0; i < results.size() - 1; i++) { - REQUIRE(results[i] == PLApprox(results[i + 1]).margin(static_cast(1e-5))); + REQUIRE(results[i] == + PLApprox(results[i + 1]) + .margin(static_cast(1e-5))); } } } diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp index b22221ec63..0dfc1eceb8 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp @@ -35,6 +35,17 @@ constexpr std::string_view remove_prefix(const std::string_view &str, return {str.data() + len, str.length() - len}; } +template constexpr auto testMargin() -> T { + static_assert(std::is_same_v || std::is_same_v); + if constexpr (std::is_same_v) { + return 1e-3F; + } else { + return 1e-5L; + } +} + +template constexpr static auto test_margin = testMargin(); + template constexpr auto findGateOpForGenerator() -> GateOperation { constexpr auto gntr_name = @@ -105,7 +116,8 @@ void testGeneratorForGate(RandomEngine &re, size_t num_qubits) { gate_func(diff_st_1.data(), num_qubits, wires, false, eps); gate_func(diff_st_2.data(), num_qubits, wires, false, -eps); - std::vector gate_der_st(static_cast(1U) << num_qubits); + std::vector gate_der_st(static_cast(1U) + << num_qubits); std::transform( diff_st_1.cbegin(), diff_st_1.cend(), diff_st_2.cbegin(), @@ -114,7 +126,8 @@ void testGeneratorForGate(RandomEngine &re, size_t num_qubits) { scaleVector(gate_der_st, static_cast(0.5) / eps); - REQUIRE(gntr_st == PLApprox(gate_der_st).margin(static_cast(1e-4))); + REQUIRE(gntr_st == + PLApprox(gate_der_st).margin(test_margin)); } } template *mat, * @param n Number of columns of `mat`. * @return mat transpose of shape n * m. */ -template -inline auto Transpose(const std::vector> &mat, size_t m, - size_t n) -> std::vector> { +template +inline auto Transpose(const std::vector, Alloc> &mat, size_t m, + size_t n) -> std::vector, Alloc> { if (mat.size() != m * n) { throw std::invalid_argument( "Invalid number of rows and columns for the input matrix"); } - std::vector> mat_t(n * m); + std::vector, Alloc> mat_t(n * m, mat.get_allocator()); CFTranspose(mat.data(), mat_t.data(), m, n, 0, m, 0, n); return mat_t; } @@ -494,7 +494,7 @@ inline auto Transpose(const std::vector &mat, size_t m, size_t n) "Invalid number of rows and columns for the input matrix"); } - std::vector mat_t(n * m); + std::vector mat_t(n * m, mat.get_allocator()); CFTranspose(mat.data(), mat_t.data(), m, n, 0, m, 0, n); return mat_t; } @@ -562,7 +562,7 @@ inline auto vecMatrixProd(const std::vector &v_in, "Invalid number of rows and columns for the input matrix"); } - std::vector v_out(n); + std::vector v_out(n, mat.get_allocator()); vecMatrixProd(v_in.data(), mat.data(), v_out.data(), m, n); return v_out; diff --git a/pennylane_lightning/src/util/Memory.hpp b/pennylane_lightning/src/util/Memory.hpp index c41a6004e2..ea2c20ec9c 100644 --- a/pennylane_lightning/src/util/Memory.hpp +++ b/pennylane_lightning/src/util/Memory.hpp @@ -103,13 +103,13 @@ template struct AlignedAllocator { template bool operator==([[maybe_unused]] const AlignedAllocator &lhs, [[maybe_unused]] const AlignedAllocator &rhs) { - return true; + return lhs.alignment_ == rhs.alignment_; } template bool operator!=([[maybe_unused]] const AlignedAllocator &lhs, [[maybe_unused]] const AlignedAllocator &rhs) { - return false; + return lhs.alignment_ != rhs.alignment_; } /** From 345c2261adbfd356c50038fbce6dfd408b508245 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Thu, 3 Mar 2022 00:13:26 +0000 Subject: [PATCH 21/94] Fix some bugs --- .../src/algorithms/AdjointDiff.hpp | 3 +++ tests/test_measures.py | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp index b27e85f28c..fc02f3c50e 100644 --- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp +++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp @@ -368,6 +368,9 @@ template class AdjointJacobian { applyOperationAdj(lambda, ops, op_idx); if (ops.hasParams(op_idx)) { + if (tp_it == tp.rend()) { + break; + } if ((current_param_idx == *tp_it) || std::find(tp_it, tp_rend, current_param_idx) != tp_rend) { diff --git a/tests/test_measures.py b/tests/test_measures.py index a843253faa..0b8bac88e5 100644 --- a/tests/test_measures.py +++ b/tests/test_measures.py @@ -16,6 +16,7 @@ """ import numpy as np import pennylane as qml +import math from pennylane.measurements import ( Variance, Expectation, @@ -55,15 +56,18 @@ def dev(self): def test_probs_dtype64(self, dev): """Test if probs changes the state dtype""" - dev._state = np.array([1, 0]).astype(np.complex64) + dev._state = dev._asarray(np.array([1/math.sqrt(2), 1/math.sqrt(2), 0, 0]).astype(np.complex64)) p = dev.probability(wires=[0, 1]) assert dev._state.dtype == np.complex64 - assert np.allclose(p, [1, 1, 0, 0]) + assert np.allclose(p, [0.5, 0.5, 0, 0]) + @pytest.mark.skipif( + not hasattr(np, "complex256"), reason="Numpy only defines complex256 in Linux-like system" + ) def test_probs_dtype_error(self, dev): """Test if probs raise error with complex256""" - dev._state = np.array([1, 0]).astype(np.complex256) + dev._state = dev._asarray(np.array([1, 0]).astype(np.complex256)) with pytest.raises(TypeError, match="Unsupported complex Type:"): dev.probability(wires=[0, 1]) @@ -179,6 +183,9 @@ def test_expval_dtype64(self, dev): assert dev._state.dtype == np.complex64 assert np.allclose(e, 0.0) + @pytest.mark.skipif( + not hasattr(np, "complex256"), reason="Numpy only defines complex256 in Linux-like system" + ) def test_expval_dtype_error(self, dev): """Test if expval raise error with complex256""" dev._state = np.array([1, 0]).astype(np.complex256) @@ -296,6 +303,9 @@ def test_var_dtype64(self, dev): assert dev._state.dtype == np.complex64 assert np.allclose(v, 1.0) + @pytest.mark.skipif( + not hasattr(np, "complex256"), reason="Numpy only defines complex256 in Linux-like system" + ) def test_expval_dtype_error(self, dev): """Test if var raise error with complex256""" dev._state = np.array([1, 0]).astype(np.complex256) From 7881d6fcf1c360374ffe5bc43b8c931897153c82 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 2 Mar 2022 19:34:24 -0500 Subject: [PATCH 22/94] black --- tests/test_measures.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_measures.py b/tests/test_measures.py index 0b8bac88e5..3c3c356ab7 100644 --- a/tests/test_measures.py +++ b/tests/test_measures.py @@ -56,7 +56,9 @@ def dev(self): def test_probs_dtype64(self, dev): """Test if probs changes the state dtype""" - dev._state = dev._asarray(np.array([1/math.sqrt(2), 1/math.sqrt(2), 0, 0]).astype(np.complex64)) + dev._state = dev._asarray( + np.array([1 / math.sqrt(2), 1 / math.sqrt(2), 0, 0]).astype(np.complex64) + ) p = dev.probability(wires=[0, 1]) assert dev._state.dtype == np.complex64 From f6866bbeaf0665941213e83f88c54066c6d08e13 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 2 Mar 2022 20:22:55 -0500 Subject: [PATCH 23/94] Fix for coverage --- tests/test_adjoint_jacobian.py | 14 ++++---- tests/test_measures.py | 2 +- tests/test_vjp.py | 60 ++++++++++++++++------------------ 3 files changed, 37 insertions(+), 39 deletions(-) diff --git a/tests/test_adjoint_jacobian.py b/tests/test_adjoint_jacobian.py index 4872212506..e394b39575 100644 --- a/tests/test_adjoint_jacobian.py +++ b/tests/test_adjoint_jacobian.py @@ -168,16 +168,16 @@ def test_unsupported_hermitian_expectation(self, dev): ) @pytest.mark.skipif(not lq._CPP_BINARY_AVAILABLE, reason="Lightning binary required") def test_unsupported_complex_type(self, dev): - with pytest.raises(TypeError, match="Unsupported .*"): - dev._state = dev._asarray(dev._state, np.complex256) + dev._state = np.zeros(8, np.complex256) # Directly put unaligned numpy array to device - with qml.tape.JacobianTape() as tape: - qml.QubitStateVector(np.array([1.0, -1.0]) / np.sqrt(2), wires=0) - qml.RX(0.3, wires=[0]) - qml.expval(qml.PauliZ(0)) + with qml.tape.JacobianTape() as tape: + qml.QubitStateVector(np.array([1.0, -1.0]) / np.sqrt(2), wires=0) + qml.RX(0.3, wires=[0]) + qml.expval(qml.PauliZ(0)) - tape.trainable_params = {1} + tape.trainable_params = {1} + with pytest.raises(TypeError, match="Unsupported .*"): dev.adjoint_jacobian(tape) @pytest.mark.parametrize("theta", np.linspace(-2 * np.pi, 2 * np.pi, 7)) diff --git a/tests/test_measures.py b/tests/test_measures.py index 3c3c356ab7..10c48c5313 100644 --- a/tests/test_measures.py +++ b/tests/test_measures.py @@ -69,7 +69,7 @@ def test_probs_dtype64(self, dev): ) def test_probs_dtype_error(self, dev): """Test if probs raise error with complex256""" - dev._state = dev._asarray(np.array([1, 0]).astype(np.complex256)) + dev._state = np.array([1, 0, 0, 0]).astype(np.complex256) with pytest.raises(TypeError, match="Unsupported complex Type:"): dev.probability(wires=[0, 1]) diff --git a/tests/test_vjp.py b/tests/test_vjp.py index 2aa97c6faa..eb73f61a4b 100644 --- a/tests/test_vjp.py +++ b/tests/test_vjp.py @@ -39,14 +39,12 @@ def dev(self): not hasattr(np, "complex256"), reason="Numpy only defines complex256 in Linux-like system" ) def test_unsupported_complex_type(self, dev): - with pytest.raises(TypeError, match="Unsupported .*"): - dev._state = dev._asarray(dev._state, np.complex256) + dev._state = np.array([1, 0, 0, 0], dtype=np.complex256) - dy = np.array([[1.0, 2.0], [3.0, 4.0]]) - jac = np.array( - [[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]] - ) + dy = np.array([[1.0, 2.0], [3.0, 4.0]]) + jac = np.array([[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]]) + with pytest.raises(TypeError, match="Unsupported .*"): dev.compute_vjp(dy, jac) @pytest.mark.parametrize("C", [np.complex64, np.complex128]) @@ -122,21 +120,21 @@ def dev(self): not hasattr(np, "complex256"), reason="Numpy only defines complex256 in Linux-like system" ) def test_unsupported_complex_type(self, dev): - with pytest.raises(TypeError, match="Unsupported .*"): - dev._state = dev._asarray(dev._state, np.complex256) + dev._state = np.array([1, 0, 0, 0], dtype=np.complex256) - x, y, z = [0.5, 0.3, -0.7] + x, y, z = [0.5, 0.3, -0.7] - with qml.tape.JacobianTape() as tape: - qml.RX(0.4, wires=[0]) - qml.Rot(x, y, z, wires=[0]) - qml.RY(-0.2, wires=[0]) - qml.expval(qml.PauliZ(0)) + with qml.tape.JacobianTape() as tape: + qml.RX(0.4, wires=[0]) + qml.Rot(x, y, z, wires=[0]) + qml.RY(-0.2, wires=[0]) + qml.expval(qml.PauliZ(0)) - tape.trainable_params = {1, 2, 3} + tape.trainable_params = {1, 2, 3} - dy = np.array([1.0]) + dy = np.array([1.0]) + with pytest.raises(TypeError, match="Unsupported .*"): dev.vjp(tape, dy)(tape) @pytest.mark.parametrize("C", [np.complex64, np.complex128]) @@ -470,26 +468,26 @@ def dev(self): not hasattr(np, "complex256"), reason="Numpy only defines complex256 in Linux-like system" ) def test_unsupported_complex_type(self, dev): - with pytest.raises(TypeError, match="Unsupported .*"): - dev._state = dev._asarray(dev._state, np.complex256) + dev._state = np.array([1, 0, 0, 0], dtype=np.complex256) - with qml.tape.QuantumTape() as tape1: - qml.RX(0.4, wires=0) - qml.CNOT(wires=[0, 1]) - qml.expval(qml.PauliZ(0)) + with qml.tape.QuantumTape() as tape1: + qml.RX(0.4, wires=0) + qml.CNOT(wires=[0, 1]) + qml.expval(qml.PauliZ(0)) - with qml.tape.JacobianTape() as tape2: - qml.RX(0.4, wires=0) - qml.RX(0.6, wires=0) - qml.CNOT(wires=[0, 1]) - qml.expval(qml.PauliZ(0)) + with qml.tape.JacobianTape() as tape2: + qml.RX(0.4, wires=0) + qml.RX(0.6, wires=0) + qml.CNOT(wires=[0, 1]) + qml.expval(qml.PauliZ(0)) - tape1.trainable_params = {0} - tape2.trainable_params = {0, 1} + tape1.trainable_params = {0} + tape2.trainable_params = {0, 1} - tapes = [tape1, tape2] - dys = [np.array([1.0]), np.array([1.0])] + tapes = [tape1, tape2] + dys = [np.array([1.0]), np.array([1.0])] + with pytest.raises(TypeError, match="Unsupported .*"): dev.batch_vjp(tapes, dys) @pytest.mark.parametrize("C", [np.complex64, np.complex128]) From 31d658569245c0058802a4fd2207bb3d1d4369f9 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 2 Mar 2022 20:59:42 -0500 Subject: [PATCH 24/94] Update for -Wpedantic --- CMakeLists.txt | 1 - pennylane_lightning/src/simulator/Measures.hpp | 4 ++-- pennylane_lightning/src/tests/Test_Measures.cpp | 2 -- pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp | 4 ++-- pennylane_lightning/src/util/ConstantUtil.hpp | 4 ++-- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e143fd62d..58b4faab21 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,7 +56,6 @@ FetchContent_MakeAvailable(pybind11) # All CMakeLists.txt in subdirectories use pennylane_lightning_compile_options and pennylane_lightning_external_libs add_subdirectory(pennylane_lightning/src) -set(CMAKE_CXX_VISIBILITY_PRESET hidden) ##################################################### # Maintain for dependent external package development diff --git a/pennylane_lightning/src/simulator/Measures.hpp b/pennylane_lightning/src/simulator/Measures.hpp index d03031ee36..c158f7ed37 100644 --- a/pennylane_lightning/src/simulator/Measures.hpp +++ b/pennylane_lightning/src/simulator/Measures.hpp @@ -178,7 +178,7 @@ class Measures { } return expected_value_list; - }; + } /** * @brief Variance of an observable. @@ -255,6 +255,6 @@ class Measures { } return expected_value_list; - }; + } }; // class Measures } // namespace Pennylane diff --git a/pennylane_lightning/src/tests/Test_Measures.cpp b/pennylane_lightning/src/tests/Test_Measures.cpp index 6f18a458df..78ecddfd90 100644 --- a/pennylane_lightning/src/tests/Test_Measures.cpp +++ b/pennylane_lightning/src/tests/Test_Measures.cpp @@ -10,12 +10,10 @@ using namespace Pennylane; -namespace { using std::complex; using std::size_t; using std::string; using std::vector; -}; // namespace StateVectorManagedCPU Initializing_StateVector() { // Defining a StateVector in a non-trivial configuration: diff --git a/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp b/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp index 99f5494128..816af428cb 100644 --- a/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp +++ b/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp @@ -17,7 +17,7 @@ template constexpr auto allGateOps() { return Util::tuple_to_array(allGateOpsHelper( std::make_integer_sequence(EnumClass::END)>{})); -}; +} template constexpr bool testAllGatesImplementedIter() { @@ -174,7 +174,7 @@ constexpr auto opFuncPtrPairsIter() { } else { return std::tuple{}; } -}; +} /** * @brief Pairs of all implemented gate operations and the corresponding diff --git a/pennylane_lightning/src/util/ConstantUtil.hpp b/pennylane_lightning/src/util/ConstantUtil.hpp index aec36602e1..d3995e7642 100644 --- a/pennylane_lightning/src/util/ConstantUtil.hpp +++ b/pennylane_lightning/src/util/ConstantUtil.hpp @@ -42,7 +42,7 @@ constexpr auto lookup(const std::array, size> &arr, } } throw std::range_error("The given key does not exist."); -}; +} /** * @brief Check an array has an element. @@ -61,7 +61,7 @@ constexpr auto array_has_elt(const std::array &arr, const U &elt) } } return false; -}; +} /** * @brief Extract first elements from the array of pairs. From 857a8313540dc45442f155711dec670034a2dd55 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 2 Mar 2022 23:52:14 -0500 Subject: [PATCH 25/94] Some fix for tidy --- pennylane_lightning/src/.clang-tidy | 2 +- .../DefaultKernelsForStateVector.hpp | 32 +++++++++++++------ .../src/simulator/DynamicDispatcher.hpp | 15 +++++---- .../src/simulator/StateVectorCPU.hpp | 12 ++++--- .../Test_GateImplementations_Generator.cpp | 2 +- pennylane_lightning/src/util/Memory.hpp | 2 ++ 6 files changed, 41 insertions(+), 24 deletions(-) diff --git a/pennylane_lightning/src/.clang-tidy b/pennylane_lightning/src/.clang-tidy index e1fce11707..50b924d24b 100644 --- a/pennylane_lightning/src/.clang-tidy +++ b/pennylane_lightning/src/.clang-tidy @@ -1,5 +1,5 @@ --- -Checks: 'clang-diagnostic-*,clang-analyzer-*,-*,-llvmlibc-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,hicpp-*,-hicpp-avoid-c-arrays,-hicpp-no-array-decay,bugprone-suspicious-*,llvm-namespace-comment,cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-slicing,cppcoreguidelines-special-member-functions' +Checks: '-*,clang-diagnostic-*,clang-analyzer-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,hicpp-*,-hicpp-avoid-c-arrays,-hicpp-no-array-decay,bugprone-suspicious-*,llvm-namespace-comment,cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-slicing,cppcoreguidelines-special-member-functions' WarningsAsErrors: '*' HeaderFilterRegex: '.*' AnalyzeTemporaryDtors: false diff --git a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp index f3a33b91bf..abedc469f0 100644 --- a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp +++ b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp @@ -509,8 +509,8 @@ class DefaultKernelsForStateVector { * @param threading Threading context * @param memory_model Memory model of the underlying data */ - auto getGateKernelMap(size_t num_qubits, Threading threading, - CPUMemoryModel memory_model) const + [[nodiscard]] auto getGateKernelMap(size_t num_qubits, Threading threading, + CPUMemoryModel memory_model) const -> std::unordered_map { uint32_t dispatch_key = toDispatchKey(threading, memory_model); @@ -532,8 +532,9 @@ class DefaultKernelsForStateVector { * @param threading Threading context * @param memory_model Memory model of the underlying data */ - auto getGeneratorKernelMap(size_t num_qubits, Threading threading, - CPUMemoryModel memory_model) const + [[nodiscard]] auto getGeneratorKernelMap(size_t num_qubits, + Threading threading, + CPUMemoryModel memory_model) const -> std::unordered_map { uint32_t dispatch_key = toDispatchKey(threading, memory_model); @@ -556,8 +557,9 @@ class DefaultKernelsForStateVector { * @param threading Threading context * @param memory_model Memory model of the underlying data */ - auto getMatrixKernelMap(size_t num_qubits, Threading threading, - CPUMemoryModel memory_model) const + [[nodiscard]] auto getMatrixKernelMap(size_t num_qubits, + Threading threading, + CPUMemoryModel memory_model) const -> std::unordered_map { uint32_t dispatch_key = toDispatchKey(threading, memory_model); @@ -578,16 +580,26 @@ class DefaultKernelsForStateVector { CPUMemoryModel memory_model, uint32_t priority) { uint32_t dispatch_key = toDispatchKey(threading, memory_model); - gate_kernel_map_[std::make_pair(gate_op, dispatch_key)].clearPriority( - priority); + const auto key = std::make_pair(gate_op, dispatch_key); + + const auto iter = generator_kernel_map_.find(key); + if (iter == gate_kernel_map_.end()) { + return; + } + iter->clearPriority(priority); } void removeKernelForMatrix(Gates::MatrixOperation mat_op, Threading threading, CPUMemoryModel memory_model, uint32_t priority) { uint32_t dispatch_key = toDispatchKey(threading, memory_model); - matrix_kernel_map_[std::make_pair(mat_op, dispatch_key)].clearPriority( - priority); + const auto key = std::make_pair(mat_op, dispatch_key); + + const auto iter = matrix_kernel_map_.find(key); + if (iter == matrix_kernel_map_.end()) { + return; + } + iter->clearPriority(priority); } }; } // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp index 71c92bf772..00d3e7c98c 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp @@ -85,20 +85,20 @@ template class DynamicDispatcher { const std::vector &, bool)>; private: - std::unordered_map str_to_gates_; - std::unordered_map str_to_gntrs_; + std::unordered_map str_to_gates_{}; + std::unordered_map str_to_gntrs_{}; std::unordered_map, GateFunc, Util::PairHash> - gates_; + gates_{}; std::unordered_map, GeneratorFunc, Util::PairHash> - generators_; + generators_{}; std::unordered_map, MatrixFunc, Util::PairHash> - matrices_; + matrices_{}; constexpr static auto removeGeneratorPrefix(std::string_view op_name) -> std::string_view { @@ -128,11 +128,12 @@ template class DynamicDispatcher { return singleton; } - auto strToGateOp(const std::string &gate_name) const + [[nodiscard]] auto strToGateOp(const std::string &gate_name) const -> Gates::GateOperation { return str_to_gates_.at(gate_name); } - auto strToGeneratorOp(const std::string &gntr_name) const + + [[nodiscard]] auto strToGeneratorOp(const std::string &gntr_name) const -> Gates::GeneratorOperation { return str_to_gntrs_.at(gntr_name); } diff --git a/pennylane_lightning/src/simulator/StateVectorCPU.hpp b/pennylane_lightning/src/simulator/StateVectorCPU.hpp index bb08bf30e1..392957b115 100644 --- a/pennylane_lightning/src/simulator/StateVectorCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorCPU.hpp @@ -40,11 +40,11 @@ class StateVectorCPU : public StateVectorBase { using BaseType = StateVectorBase; std::unordered_map - kernel_for_gates_; + kernel_for_gates_{}; std::unordered_map - kernel_for_generators_; + kernel_for_generators_{}; std::unordered_map - kernel_for_matrices_; + kernel_for_matrices_{}; void setKernels(size_t num_qubits, Threading threading, CPUMemoryModel memory_model) { @@ -83,7 +83,9 @@ class StateVectorCPU : public StateVectorBase { return kernel_for_matrices_.at(mat_op); } - inline CPUMemoryModel memoryModel() const { return memory_model_; } - inline Threading threading() const { return threading_; } + [[nodiscard]] inline CPUMemoryModel memoryModel() const { + return memory_model_; + } + [[nodiscard]] inline Threading threading() const { return threading_; } }; } // namespace Pennylane diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp index 0dfc1eceb8..ecd52275e2 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp @@ -85,7 +85,7 @@ void testGeneratorForGate(RandomEngine &re, size_t num_qubits) { using ComplexPrecisionT = std::complex; constexpr auto I = Util::IMAG(); - constexpr ParamT eps = static_cast(1e-4); // For finite difference + constexpr auto eps = static_cast(1e-4); // For finite difference constexpr auto gate_op = static_lookup(generator_gate_pairs); constexpr auto gate_name = static_lookup(Constant::gate_names); diff --git a/pennylane_lightning/src/util/Memory.hpp b/pennylane_lightning/src/util/Memory.hpp index ea2c20ec9c..e78923b64d 100644 --- a/pennylane_lightning/src/util/Memory.hpp +++ b/pennylane_lightning/src/util/Memory.hpp @@ -76,6 +76,7 @@ template struct AlignedAllocator { if (alignment_ > alignof(std::max_align_t)) { p = alignedAlloc(alignment_, sizeof(T) * size); } else { + // NOLINTNEXTLINE(hicpp-no-malloc) p = malloc(sizeof(T) * size); } if (p == nullptr) { @@ -88,6 +89,7 @@ template struct AlignedAllocator { if (alignment_ > alignof(std::max_align_t)) { alignedFree(p); } else { + // NOLINTNEXTLINE(hicpp-no-malloc) free(p); } } From 838b691cc901ab67f11aaed47fa5f24c0f441cb3 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Thu, 3 Mar 2022 00:01:48 -0500 Subject: [PATCH 26/94] Fix --- .../DefaultKernelsForStateVector.hpp | 22 ++++++++++++++----- pennylane_lightning/src/tests/.clang-tidy | 2 +- .../Test_DefaultKernelsForStateVector.cpp | 6 ++--- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp index abedc469f0..020b0288f8 100644 --- a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp +++ b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp @@ -575,18 +575,30 @@ class DefaultKernelsForStateVector { return kernel_for_matrices; } - void removeKernelForGenerator(Gates::GateOperation gate_op, + void removeKernelForGate(Gates::GateOperation gate_op, Threading threading, + CPUMemoryModel memory_model, uint32_t priority) { + uint32_t dispatch_key = toDispatchKey(threading, memory_model); + const auto key = std::make_pair(gate_op, dispatch_key); + + const auto iter = gate_kernel_map_.find(key); + if (iter == gate_kernel_map_.end()) { + return; + } + (iter->second).clearPriority(priority); + } + + void removeKernelForGenerator(Gates::GeneratorOperation gntr_op, Threading threading, CPUMemoryModel memory_model, uint32_t priority) { uint32_t dispatch_key = toDispatchKey(threading, memory_model); - const auto key = std::make_pair(gate_op, dispatch_key); + const auto key = std::make_pair(gntr_op, dispatch_key); const auto iter = generator_kernel_map_.find(key); - if (iter == gate_kernel_map_.end()) { + if (iter == generator_kernel_map_.end()) { return; } - iter->clearPriority(priority); + (iter->second).clearPriority(priority); } void removeKernelForMatrix(Gates::MatrixOperation mat_op, @@ -599,7 +611,7 @@ class DefaultKernelsForStateVector { if (iter == matrix_kernel_map_.end()) { return; } - iter->clearPriority(priority); + (iter->second).clearPriority(priority); } }; } // namespace Pennylane diff --git a/pennylane_lightning/src/tests/.clang-tidy b/pennylane_lightning/src/tests/.clang-tidy index 3b5744a4b0..75afabace1 100644 --- a/pennylane_lightning/src/tests/.clang-tidy +++ b/pennylane_lightning/src/tests/.clang-tidy @@ -1,5 +1,5 @@ --- -Checks: 'clang-diagnostic-*,clang-analyzer-*,-*,-llvmlibc-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,-modernize-avoid-c-arrays,-readability-magic-numbers,hicpp-*,-hicpp-no-array-decay,-hicpp-avoid-c-arrays,bugprone-suspicious-*,llvm-namespace-comment,cppcoreguidelines-slicing,cppcoreguidelines-special-member-functions' +Checks: '-*,clang-diagnostic-*,clang-analyzer-*,-llvmlibc-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,-modernize-avoid-c-arrays,-readability-magic-numbers,hicpp-*,-hicpp-no-array-decay,-hicpp-avoid-c-arrays,bugprone-suspicious-*,llvm-namespace-comment,cppcoreguidelines-slicing,cppcoreguidelines-special-member-functions' WarningsAsErrors: '*' HeaderFilterRegex: '.*' AnalyzeTemporaryDtors: false diff --git a/pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp b/pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp index 663c76e51c..32b9d0a8f4 100644 --- a/pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp +++ b/pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp @@ -92,9 +92,9 @@ TEST_CASE("Test priority works", "[Test_DefaultKernelsForStateVector]") { CPUMemoryModel::Unaligned)[GateOperation::PauliX] == KernelType::PI); - instance.removeKernelForGenerator(GateOperation::PauliX, - Threading::SingleThread, - CPUMemoryModel::Unaligned, 100); + instance.removeKernelForGate(GateOperation::PauliX, + Threading::SingleThread, + CPUMemoryModel::Unaligned, 100); REQUIRE(instance.getGateKernelMap( 24, Threading::SingleThread, CPUMemoryModel::Unaligned)[GateOperation::PauliX] == From c09625840a4fc21602ae9566b61afe51bdfeda67 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Thu, 3 Mar 2022 00:26:05 -0500 Subject: [PATCH 27/94] Add omp for clang --- .github/workflows/format.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 319471b3ca..a4f04e6334 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -45,7 +45,7 @@ jobs: python-version: 3.8 - name: Install dependencies - run: sudo apt update && sudo apt -y install clang-tidy-12 cmake g++ + run: sudo apt update && sudo apt -y install clang-tidy-12 cmake g++ libomp-dev env: DEBIAN_FRONTEND: noninteractive From 73a812b96bed9ac882223bbcf318fc012d745ef3 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Thu, 3 Mar 2022 00:32:17 -0500 Subject: [PATCH 28/94] Specify version --- .github/workflows/format.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index a4f04e6334..c7688703ca 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -45,7 +45,7 @@ jobs: python-version: 3.8 - name: Install dependencies - run: sudo apt update && sudo apt -y install clang-tidy-12 cmake g++ libomp-dev + run: sudo apt update && sudo apt -y install clang-tidy-12 cmake g++ libomp-12-dev env: DEBIAN_FRONTEND: noninteractive From 41adfe064a442b454ec3b4e2d14d975570954896 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Thu, 3 Mar 2022 09:09:53 -0500 Subject: [PATCH 29/94] Fix --- .../src/simulator/DynamicDispatcher.hpp | 10 +++++----- pennylane_lightning/src/simulator/StateVectorCPU.hpp | 6 +++--- tests/test_vjp.py | 12 ++++++++++++ 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp index 00d3e7c98c..47f73e5e5e 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp @@ -85,20 +85,20 @@ template class DynamicDispatcher { const std::vector &, bool)>; private: - std::unordered_map str_to_gates_{}; - std::unordered_map str_to_gntrs_{}; + std::unordered_map str_to_gates_; + std::unordered_map str_to_gntrs_; std::unordered_map, GateFunc, Util::PairHash> - gates_{}; + gates_; std::unordered_map, GeneratorFunc, Util::PairHash> - generators_{}; + generators_; std::unordered_map, MatrixFunc, Util::PairHash> - matrices_{}; + matrices_; constexpr static auto removeGeneratorPrefix(std::string_view op_name) -> std::string_view { diff --git a/pennylane_lightning/src/simulator/StateVectorCPU.hpp b/pennylane_lightning/src/simulator/StateVectorCPU.hpp index 392957b115..e0f944ad25 100644 --- a/pennylane_lightning/src/simulator/StateVectorCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorCPU.hpp @@ -40,11 +40,11 @@ class StateVectorCPU : public StateVectorBase { using BaseType = StateVectorBase; std::unordered_map - kernel_for_gates_{}; + kernel_for_gates_; std::unordered_map - kernel_for_generators_{}; + kernel_for_generators_; std::unordered_map - kernel_for_matrices_{}; + kernel_for_matrices_; void setKernels(size_t num_qubits, Threading threading, CPUMemoryModel memory_model) { diff --git a/tests/test_vjp.py b/tests/test_vjp.py index eb73f61a4b..6bc1bfc884 100644 --- a/tests/test_vjp.py +++ b/tests/test_vjp.py @@ -108,6 +108,18 @@ def test_zero_dy(self, dev, C): vjp = dev.compute_vjp(dy, jac) assert np.all(vjp == np.zeros([3])) + @pytest.mark.parametrize("C", [np.complex64, np.complex128]) + def test_non_numpy_dy(self, dev, C): + "Test when dy is torch.tensor" + torch = pytest.importorskip("torch") + dev._state = dev._asarray(dev._state, C) + + dy = torch.zeros(2, 2) + jac = np.array([[[1.0, 0.1, 0.2], [0.2, 0.6, 0.1]], [[0.4, -0.7, 1.2], [-0.5, -0.6, 0.7]]]) + + vjp = dev.compute_vjp(dy, jac) + assert torch.equal(vjp, torch.zeros([3], dtype=torch.double)) + class TestVectorJacobianProduct: """Tests for the `vjp` function""" From 5dd0919a333328865430105bc132054bf0239e92 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Thu, 3 Mar 2022 10:13:28 -0500 Subject: [PATCH 30/94] Trigger From 63b52c7d2c25852f14fc6fc71b6b7b0ba8adb4cb Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Thu, 3 Mar 2022 14:35:20 -0500 Subject: [PATCH 31/94] Fix --- pennylane_lightning/_serialize.py | 1 - .../src/simulator/DefaultKernelsForStateVector.hpp | 3 +-- tests/test_vjp.py | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pennylane_lightning/_serialize.py b/pennylane_lightning/_serialize.py index db7d78cb8e..7c4f483c2b 100644 --- a/pennylane_lightning/_serialize.py +++ b/pennylane_lightning/_serialize.py @@ -34,7 +34,6 @@ ObsStructC64, StateVectorC128, ObsStructC128, - DEFAULT_KERNEL_FOR_OPS, ) except ImportError: pass diff --git a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp index 020b0288f8..0dbe545044 100644 --- a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp +++ b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp @@ -108,8 +108,7 @@ class PriorityDispatchSet { class DefaultKernelsForStateVector { private: - const static inline std::unordered_map> + const std::unordered_map> allowed_kernels{ {CPUMemoryModel::Unaligned, {Gates::KernelType::LM, Gates::KernelType::PI}}, diff --git a/tests/test_vjp.py b/tests/test_vjp.py index 6bc1bfc884..d944f7bbde 100644 --- a/tests/test_vjp.py +++ b/tests/test_vjp.py @@ -110,7 +110,7 @@ def test_zero_dy(self, dev, C): @pytest.mark.parametrize("C", [np.complex64, np.complex128]) def test_non_numpy_dy(self, dev, C): - "Test when dy is torch.tensor" + "Test compute_vjp works when dy is torch.tensor" torch = pytest.importorskip("torch") dev._state = dev._asarray(dev._state, C) From 1102ad07abb544a497a44591416ef5f2d71e941e Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Thu, 3 Mar 2022 17:44:01 -0500 Subject: [PATCH 32/94] KernelMap refactor --- .../src/simulator/CMakeLists.txt | 2 +- .../DefaultKernelsForStateVector.hpp | 616 ------------------ .../src/simulator/DynamicDispatcher.hpp | 6 +- .../src/simulator/KernelMap.cpp | 188 ++++++ .../src/simulator/KernelMap.hpp | 282 ++++++++ .../src/simulator/StateVectorCPU.hpp | 19 +- pennylane_lightning/src/tests/CMakeLists.txt | 2 +- ...sForStateVector.cpp => Test_KernelMap.cpp} | 77 ++- .../src/util/IntegerInterval.hpp | 2 +- 9 files changed, 531 insertions(+), 663 deletions(-) delete mode 100644 pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp create mode 100644 pennylane_lightning/src/simulator/KernelMap.cpp create mode 100644 pennylane_lightning/src/simulator/KernelMap.hpp rename pennylane_lightning/src/tests/{Test_DefaultKernelsForStateVector.cpp => Test_KernelMap.cpp} (50%) diff --git a/pennylane_lightning/src/simulator/CMakeLists.txt b/pennylane_lightning/src/simulator/CMakeLists.txt index ff07211f3a..452d5353e3 100644 --- a/pennylane_lightning/src/simulator/CMakeLists.txt +++ b/pennylane_lightning/src/simulator/CMakeLists.txt @@ -1,7 +1,7 @@ project(lightning_simulator) set(CMAKE_CXX_STANDARD 17) -set(SIMULATOR_FILES DynamicDispatcher.cpp CACHE INTERNAL "" FORCE) +set(SIMULATOR_FILES DynamicDispatcher.cpp KernelMap.cpp CACHE INTERNAL "" FORCE) add_library(lightning_simulator STATIC ${SIMULATOR_FILES}) diff --git a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp b/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp deleted file mode 100644 index 0dbe545044..0000000000 --- a/pennylane_lightning/src/simulator/DefaultKernelsForStateVector.hpp +++ /dev/null @@ -1,616 +0,0 @@ -// Copyright 2022 Xanadu Quantum Technologies Inc. - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 - -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/** - * @file - * Set/get Default kernels for statevector - */ -#include "DispatchKeys.hpp" -#include "GateOperation.hpp" -#include "IntegerInterval.hpp" -#include "KernelType.hpp" -#include "Util.hpp" - -#include -#include -#include -#include - -namespace Pennylane { - -///@cond DEV -struct DispatchElement { - uint32_t priority; - Util::IntegerInterval interval; - Gates::KernelType kernel; -}; - -inline bool lower_priority(const DispatchElement &lhs, - const DispatchElement &rhs) { - return lhs.priority < rhs.priority; -} - -inline bool higher_priority(const DispatchElement &lhs, - const DispatchElement &rhs) { - return lhs.priority > rhs.priority; -} - -/** - * @brief Maintain dispatch element using a vector decreasingly-ordered by - * priority. - */ -class PriorityDispatchSet { - private: - std::vector ordered_vec_; - - public: - [[nodiscard]] bool - conflict(uint32_t test_priority, - const Util::IntegerInterval &test_interval) const { - const auto test_elt = DispatchElement{test_priority, test_interval, - Gates::KernelType::None}; - const auto [b, e] = - std::equal_range(ordered_vec_.begin(), ordered_vec_.end(), test_elt, - higher_priority); - for (auto iter = b; iter != e; ++iter) { - if (!is_disjoint(iter->interval, test_interval)) { - return true; - } - } - return false; - } - - void insert(const DispatchElement &elt) { - const auto iter_to_insert = std::upper_bound( - ordered_vec_.begin(), ordered_vec_.end(), elt, &higher_priority); - ordered_vec_.insert(iter_to_insert, elt); - } - - template void emplace(Ts &&...args) { - const auto elt = DispatchElement{std::forward(args)...}; - const auto iter_to_insert = std::upper_bound( - ordered_vec_.begin(), ordered_vec_.end(), elt, &higher_priority); - ordered_vec_.insert(iter_to_insert, elt); - } - - [[nodiscard]] Gates::KernelType getKernel(size_t num_qubits) const { - for (const auto &elt : ordered_vec_) { - if (elt.interval(num_qubits)) { - return elt.kernel; - } - } - throw std::range_error( - "Cannot find a kernel for the given number of qubits."); - } - - void clearPriority(uint32_t remove_priority) { - const auto begin = std::lower_bound( - ordered_vec_.begin(), ordered_vec_.end(), remove_priority, - [](const auto &elt, uint32_t p) { return elt.priority > p; }); - const auto end = std::upper_bound( - ordered_vec_.begin(), ordered_vec_.end(), remove_priority, - [](uint32_t p, const auto &elt) { return p > elt.priority; }); - ordered_vec_.erase(begin, end); - } -}; - -///@endcond - -class DefaultKernelsForStateVector { - private: - const std::unordered_map> - allowed_kernels{ - {CPUMemoryModel::Unaligned, - {Gates::KernelType::LM, Gates::KernelType::PI}}, - {CPUMemoryModel::Aligned256, - {Gates::KernelType::LM, Gates::KernelType::PI}}, - {CPUMemoryModel::Aligned512, - {Gates::KernelType::LM, Gates::KernelType::PI}}, - }; - - std::unordered_map< - std::pair, - PriorityDispatchSet, Util::PairHash> - gate_kernel_map_; - - std::unordered_map< - std::pair, - PriorityDispatchSet, Util::PairHash> - generator_kernel_map_; - - std::unordered_map< - std::pair, - PriorityDispatchSet, Util::PairHash> - matrix_kernel_map_; - - void registerDefaultGates() { - using Gates::GateOperation; - using Util::full_domain; - using Util::in_between_closed; - using Util::larger_than; - using Util::larger_than_equal_to; - using Util::less_than; - using Util::less_than_equal_to; - - auto &instance = *this; - auto all_qubit_numbers = full_domain(); - /* Single-qubit gates */ - instance.assignKernelForGate(GateOperation::PauliX, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::PauliY, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::PauliZ, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::Hadamard, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::S, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::T, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::PhaseShift, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::RX, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::RY, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::RZ, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::Rot, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - /* Two-qubit gates */ - instance.assignKernelForGate(GateOperation::CNOT, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::CY, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::CZ, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::ControlledPhaseShift, - all_threading, all_memory_model, - all_qubit_numbers, Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::SWAP, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - - instance.assignKernelForGate( - GateOperation::IsingXX, all_threading, - // NOLINTNEXTLINE(readability-magic-numbers) - all_memory_model, less_than(12), Gates::KernelType::LM); - instance.assignKernelForGate( - GateOperation::IsingXX, all_threading, all_memory_model, - // NOLINTNEXTLINE(readability-magic-numbers) - in_between_closed(12, 20), Gates::KernelType::PI); - instance.assignKernelForGate( - GateOperation::IsingXX, all_threading, - // NOLINTNEXTLINE(readability-magic-numbers) - all_memory_model, larger_than(20), Gates::KernelType::LM); - - instance.assignKernelForGate(GateOperation::IsingYY, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::IsingZZ, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::CRX, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::CRY, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::CRZ, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::CRot, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - instance.assignKernelForGate(GateOperation::Toffoli, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::PI); - instance.assignKernelForGate(GateOperation::CSWAP, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::PI); - instance.assignKernelForGate(GateOperation::MultiRZ, all_threading, - all_memory_model, all_qubit_numbers, - Gates::KernelType::LM); - } - - void registerDefaultGenerators() { - using Gates::GateOperation; - using Gates::GeneratorOperation; - using Gates::KernelType; - using Util::full_domain; - using Util::in_between_closed; - using Util::larger_than; - using Util::larger_than_equal_to; - using Util::less_than; - using Util::less_than_equal_to; - - auto &instance = *this; - auto all_qubit_numbers = full_domain(); - - instance.assignKernelForGenerator(GeneratorOperation::PhaseShift, - all_threading, all_memory_model, - all_qubit_numbers, KernelType::LM); - instance.assignKernelForGenerator(GeneratorOperation::RX, all_threading, - all_memory_model, all_qubit_numbers, - KernelType::LM); - instance.assignKernelForGenerator(GeneratorOperation::RY, all_threading, - all_memory_model, all_qubit_numbers, - KernelType::LM); - instance.assignKernelForGenerator(GeneratorOperation::RZ, all_threading, - all_memory_model, all_qubit_numbers, - KernelType::LM); - instance.assignKernelForGenerator(GeneratorOperation::IsingXX, - all_threading, all_memory_model, - all_qubit_numbers, KernelType::LM); - instance.assignKernelForGenerator(GeneratorOperation::IsingYY, - all_threading, all_memory_model, - all_qubit_numbers, KernelType::LM); - instance.assignKernelForGenerator(GeneratorOperation::IsingZZ, - all_threading, all_memory_model, - all_qubit_numbers, KernelType::LM); - instance.assignKernelForGenerator(GeneratorOperation::CRX, - all_threading, all_memory_model, - all_qubit_numbers, KernelType::LM); - instance.assignKernelForGenerator(GeneratorOperation::CRY, - all_threading, all_memory_model, - all_qubit_numbers, KernelType::LM); - instance.assignKernelForGenerator(GeneratorOperation::CRZ, - all_threading, all_memory_model, - all_qubit_numbers, KernelType::LM); - instance.assignKernelForGenerator( - GeneratorOperation::ControlledPhaseShift, all_threading, - all_memory_model, all_qubit_numbers, KernelType::LM); - instance.assignKernelForGenerator(GeneratorOperation::MultiRZ, - all_threading, all_memory_model, - all_qubit_numbers, KernelType::LM); - } - - void registerDefaultMatrices() { - using Gates::GateOperation; - using Gates::KernelType; - using Gates::MatrixOperation; - using Util::full_domain; - using Util::in_between_closed; - using Util::larger_than; - using Util::larger_than_equal_to; - using Util::less_than; - using Util::less_than_equal_to; - - auto &instance = *this; - auto all_qubit_numbers = full_domain(); - - instance.assignKernelForMatrix(MatrixOperation::SingleQubitOp, - all_threading, all_memory_model, - all_qubit_numbers, KernelType::LM); - instance.assignKernelForMatrix(MatrixOperation::TwoQubitOp, - all_threading, all_memory_model, - all_qubit_numbers, KernelType::LM); - instance.assignKernelForMatrix(MatrixOperation::MultiQubitOp, - all_threading, all_memory_model, - all_qubit_numbers, KernelType::PI); - } - - DefaultKernelsForStateVector() { - registerDefaultGates(); - registerDefaultGenerators(); - registerDefaultMatrices(); - } - - public: - struct AllThreading {}; - - struct AllMemoryModel {}; - - constexpr static AllThreading all_threading{}; - constexpr static AllMemoryModel all_memory_model{}; - - static auto getInstance() -> DefaultKernelsForStateVector & { - static DefaultKernelsForStateVector instance; - - return instance; - } - - void assignKernelForGate(Gates::GateOperation gate_op, Threading threading, - CPUMemoryModel memory_model, uint32_t priority, - const Util::IntegerInterval &interval, - Gates::KernelType kernel) { - if (std::find(allowed_kernels.at(memory_model).cbegin(), - allowed_kernels.at(memory_model).cend(), - kernel) == allowed_kernels.at(memory_model).cend()) { - throw std::invalid_argument("The given kernel is now allowed for " - "the given memory model."); - } - const auto dispatch_key = toDispatchKey(threading, memory_model); - auto &set = gate_kernel_map_[std::make_pair(gate_op, dispatch_key)]; - - if (set.conflict(priority, interval)) { - throw std::invalid_argument("The given interval conflicts with " - "existing intervals."); - } - set.emplace(priority, interval, kernel); - } - - void assignKernelForGate(Gates::GateOperation gate_op, - [[maybe_unused]] AllThreading dummy, - CPUMemoryModel memory_model, - const Util::IntegerInterval &interval, - Gates::KernelType kernel) { - /* Priority for all threading is 1 */ - Util::for_each_enum([=](Threading threading) { - assignKernelForGate(gate_op, threading, memory_model, 1, interval, - kernel); - }); - } - - void assignKernelForGate(Gates::GateOperation gate_op, Threading threading, - [[maybe_unused]] AllMemoryModel dummy, - const Util::IntegerInterval &interval, - Gates::KernelType kernel) { - /* Priority for all memory model is 2 */ - Util::for_each_enum([=](CPUMemoryModel memory_model) { - assignKernelForGate(gate_op, threading, memory_model, 2, interval, - kernel); - }); - } - - void assignKernelForGate(Gates::GateOperation gate_op, - [[maybe_unused]] AllThreading dummy1, - [[maybe_unused]] AllMemoryModel dummy2, - const Util::IntegerInterval &interval, - Gates::KernelType kernel) { - /* Priority is 0 */ - Util::for_each_enum( - [=](Threading threading, CPUMemoryModel memory_model) { - assignKernelForGate(gate_op, threading, memory_model, 0, - interval, kernel); - }); - } - - void assignKernelForGenerator(Gates::GeneratorOperation gntr_op, - Threading threading, - CPUMemoryModel memory_model, - uint32_t priority, - const Util::IntegerInterval &interval, - Gates::KernelType kernel) { - if (std::find(allowed_kernels.at(memory_model).cbegin(), - allowed_kernels.at(memory_model).cend(), - kernel) == allowed_kernels.at(memory_model).cend()) { - throw std::invalid_argument("The given kernel is now allowed for " - "the given memory model."); - } - const auto dispatch_key = toDispatchKey(threading, memory_model); - auto &set = - generator_kernel_map_[std::make_pair(gntr_op, dispatch_key)]; - - if (set.conflict(priority, interval)) { - throw std::invalid_argument("The given interval conflicts with " - "existing intervals."); - } - set.emplace(priority, interval, kernel); - } - - void assignKernelForGenerator(Gates::GeneratorOperation gntr_op, - [[maybe_unused]] AllThreading dummy, - CPUMemoryModel memory_model, - const Util::IntegerInterval &interval, - Gates::KernelType kernel) { - Util::for_each_enum([=](Threading threading) { - assignKernelForGenerator(gntr_op, threading, memory_model, 1, - interval, kernel); - }); - } - - void assignKernelForGenerator(Gates::GeneratorOperation gntr_op, - Threading threading, - [[maybe_unused]] AllMemoryModel dummy, - const Util::IntegerInterval &interval, - Gates::KernelType kernel) { - Util::for_each_enum([=](CPUMemoryModel memory_model) { - assignKernelForGenerator(gntr_op, threading, memory_model, 2, - interval, kernel); - }); - } - - void assignKernelForGenerator(Gates::GeneratorOperation gntr_op, - [[maybe_unused]] AllThreading dummy1, - [[maybe_unused]] AllMemoryModel dummy2, - const Util::IntegerInterval &interval, - Gates::KernelType kernel) { - Util::for_each_enum( - [=](Threading threading, CPUMemoryModel memory_model) { - assignKernelForGenerator(gntr_op, threading, memory_model, 0, - interval, kernel); - }); - } - - void assignKernelForMatrix(Gates::MatrixOperation mat_op, - Threading threading, CPUMemoryModel memory_model, - uint32_t priority, - const Util::IntegerInterval &interval, - Gates::KernelType kernel) { - if (std::find(allowed_kernels.at(memory_model).cbegin(), - allowed_kernels.at(memory_model).cend(), - kernel) == allowed_kernels.at(memory_model).cend()) { - throw std::invalid_argument("The given kernel is now allowed for " - "the given memory model."); - } - const auto dispatch_key = toDispatchKey(threading, memory_model); - auto &set = matrix_kernel_map_[std::make_pair(mat_op, dispatch_key)]; - - if (set.conflict(priority, interval)) { - throw std::invalid_argument("The given interval conflicts with " - "existing intervals."); - } - set.emplace(priority, interval, kernel); - } - - void assignKernelForMatrix(Gates::MatrixOperation mat_op, - [[maybe_unused]] AllThreading dummy, - CPUMemoryModel memory_model, - const Util::IntegerInterval &interval, - Gates::KernelType kernel) { - Util::for_each_enum([=](Threading threading) { - assignKernelForMatrix(mat_op, threading, memory_model, 1, interval, - kernel); - }); - } - - void assignKernelForMatrix(Gates::MatrixOperation mat_op, - Threading threading, - [[maybe_unused]] AllMemoryModel dummy, - const Util::IntegerInterval &interval, - Gates::KernelType kernel) { - Util::for_each_enum([=](CPUMemoryModel memory_model) { - assignKernelForMatrix(mat_op, threading, memory_model, 2, interval, - kernel); - }); - } - - void assignKernelForMatrix(Gates::MatrixOperation mat_op, - [[maybe_unused]] AllThreading dummy1, - [[maybe_unused]] AllMemoryModel dummy2, - const Util::IntegerInterval &interval, - Gates::KernelType kernel) { - Util::for_each_enum( - [=](Threading threading, CPUMemoryModel memory_model) { - assignKernelForMatrix(mat_op, threading, memory_model, 0, - interval, kernel); - }); - } - - /** - * @brief Create default kernels for all gates - * @param num_qubits Number of qubits - * @param threading Threading context - * @param memory_model Memory model of the underlying data - */ - [[nodiscard]] auto getGateKernelMap(size_t num_qubits, Threading threading, - CPUMemoryModel memory_model) const - -> std::unordered_map { - uint32_t dispatch_key = toDispatchKey(threading, memory_model); - - std::unordered_map - kernel_for_gates; - - Util::for_each_enum( - [&](Gates::GateOperation gate_op) { - const auto key = std::make_pair(gate_op, dispatch_key); - const auto &set = gate_kernel_map_.at(key); - kernel_for_gates.emplace(gate_op, set.getKernel(num_qubits)); - }); - return kernel_for_gates; - } - - /** - * @brief Create default kernels for all generators - * @param num_qubits Number of qubits - * @param threading Threading context - * @param memory_model Memory model of the underlying data - */ - [[nodiscard]] auto getGeneratorKernelMap(size_t num_qubits, - Threading threading, - CPUMemoryModel memory_model) const - -> std::unordered_map { - uint32_t dispatch_key = toDispatchKey(threading, memory_model); - - std::unordered_map - kernel_for_generators; - - Util::for_each_enum( - [&](Gates::GeneratorOperation gntr_op) { - const auto key = std::make_pair(gntr_op, dispatch_key); - const auto &set = generator_kernel_map_.at(key); - kernel_for_generators.emplace(gntr_op, - set.getKernel(num_qubits)); - }); - return kernel_for_generators; - } - - /** - * @brief Create default kernels for all matrix operations - * @param num_qubits Number of qubits - * @param threading Threading context - * @param memory_model Memory model of the underlying data - */ - [[nodiscard]] auto getMatrixKernelMap(size_t num_qubits, - Threading threading, - CPUMemoryModel memory_model) const - -> std::unordered_map { - uint32_t dispatch_key = toDispatchKey(threading, memory_model); - - std::unordered_map - kernel_for_matrices; - - Util::for_each_enum( - [&](Gates::MatrixOperation mat_op) { - const auto key = std::make_pair(mat_op, dispatch_key); - const auto &set = matrix_kernel_map_.at(key); - kernel_for_matrices.emplace(mat_op, set.getKernel(num_qubits)); - }); - return kernel_for_matrices; - } - - void removeKernelForGate(Gates::GateOperation gate_op, Threading threading, - CPUMemoryModel memory_model, uint32_t priority) { - uint32_t dispatch_key = toDispatchKey(threading, memory_model); - const auto key = std::make_pair(gate_op, dispatch_key); - - const auto iter = gate_kernel_map_.find(key); - if (iter == gate_kernel_map_.end()) { - return; - } - (iter->second).clearPriority(priority); - } - - void removeKernelForGenerator(Gates::GeneratorOperation gntr_op, - Threading threading, - CPUMemoryModel memory_model, - uint32_t priority) { - uint32_t dispatch_key = toDispatchKey(threading, memory_model); - const auto key = std::make_pair(gntr_op, dispatch_key); - - const auto iter = generator_kernel_map_.find(key); - if (iter == generator_kernel_map_.end()) { - return; - } - (iter->second).clearPriority(priority); - } - - void removeKernelForMatrix(Gates::MatrixOperation mat_op, - Threading threading, CPUMemoryModel memory_model, - uint32_t priority) { - uint32_t dispatch_key = toDispatchKey(threading, memory_model); - const auto key = std::make_pair(mat_op, dispatch_key); - - const auto iter = matrix_kernel_map_.find(key); - if (iter == matrix_kernel_map_.end()) { - return; - } - (iter->second).clearPriority(priority); - } -}; -} // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp index 47f73e5e5e..d4cd500bd7 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp @@ -53,14 +53,14 @@ namespace Pennylane { * @brief These functions are only used to register kernels to the dynamic * dispatcher. */ -template struct registerBeforeMain; +template struct RegisterBeforeMain; -template <> struct registerBeforeMain { +template <> struct RegisterBeforeMain { static inline const int dummy = Internal::registerAllAvailableKernels(); }; -template <> struct registerBeforeMain { +template <> struct RegisterBeforeMain { static inline const int dummy = Internal::registerAllAvailableKernels(); }; diff --git a/pennylane_lightning/src/simulator/KernelMap.cpp b/pennylane_lightning/src/simulator/KernelMap.cpp new file mode 100644 index 0000000000..73f5a75d88 --- /dev/null +++ b/pennylane_lightning/src/simulator/KernelMap.cpp @@ -0,0 +1,188 @@ +// Copyright 2022 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "KernelMap.hpp" + +#include "GateOperation.hpp" +#include "KernelType.hpp" + +using namespace Pennylane; +using namespace Pennylane::KernelMap; + +using Gates::GateOperation; +using Gates::GeneratorOperation; +using Gates::KernelType; +using Gates::MatrixOperation; +using Util::full_domain; +using Util::in_between_closed; +using Util::larger_than; +using Util::larger_than_equal_to; +using Util::less_than; +using Util::less_than_equal_to; + +namespace Pennylane::KernelMap::Internal { + +constexpr static auto all_qubit_numbers = Util::full_domain(); + +int assignDefaultKernelsForGateOp() { + auto &instance = OperationKernelMap::getInstance(); + + instance.assignKernelForOp(GateOperation::PauliX, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::PauliY, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::PauliZ, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::Hadamard, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::S, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::T, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::PhaseShift, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::RX, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::RY, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::RZ, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::Rot, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + /* Two-qubit gates */ + instance.assignKernelForOp(GateOperation::CNOT, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::CY, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::CZ, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::ControlledPhaseShift, + all_threading, all_memory_model, + all_qubit_numbers, Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::SWAP, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + + instance.assignKernelForOp(GateOperation::IsingXX, all_threading, + // NOLINTNEXTLINE(readability-magic-numbers) + all_memory_model, less_than(12), + Gates::KernelType::LM); + instance.assignKernelForOp( + GateOperation::IsingXX, all_threading, all_memory_model, + // NOLINTNEXTLINE(readability-magic-numbers) + in_between_closed(12, 20), Gates::KernelType::PI); + instance.assignKernelForOp(GateOperation::IsingXX, all_threading, + // NOLINTNEXTLINE(readability-magic-numbers) + all_memory_model, larger_than(20), + Gates::KernelType::LM); + + instance.assignKernelForOp(GateOperation::IsingYY, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::IsingZZ, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::CRX, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::CRY, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::CRZ, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::CRot, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + instance.assignKernelForOp(GateOperation::Toffoli, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::PI); + instance.assignKernelForOp(GateOperation::CSWAP, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::PI); + instance.assignKernelForOp(GateOperation::MultiRZ, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); + return 1; +} + +int assignDefaultKernelsForGeneratorOp() { + auto &instance = OperationKernelMap::getInstance(); + + instance.assignKernelForOp(GeneratorOperation::PhaseShift, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForOp(GeneratorOperation::RX, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForOp(GeneratorOperation::RY, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForOp(GeneratorOperation::RZ, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForOp(GeneratorOperation::IsingXX, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForOp(GeneratorOperation::IsingYY, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForOp(GeneratorOperation::IsingZZ, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForOp(GeneratorOperation::CRX, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForOp(GeneratorOperation::CRY, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForOp(GeneratorOperation::CRZ, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForOp(GeneratorOperation::ControlledPhaseShift, + all_threading, all_memory_model, + all_qubit_numbers, KernelType::LM); + instance.assignKernelForOp(GeneratorOperation::MultiRZ, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + return 1; +} +int assignDefaultKernelsForMatrixOp() { + auto &instance = OperationKernelMap::getInstance(); + + instance.assignKernelForOp(MatrixOperation::SingleQubitOp, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForOp(MatrixOperation::TwoQubitOp, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::LM); + instance.assignKernelForOp(MatrixOperation::MultiQubitOp, all_threading, + all_memory_model, all_qubit_numbers, + KernelType::PI); + return 1; +} +} // namespace Pennylane::KernelMap::Internal diff --git a/pennylane_lightning/src/simulator/KernelMap.hpp b/pennylane_lightning/src/simulator/KernelMap.hpp new file mode 100644 index 0000000000..51532b9c8c --- /dev/null +++ b/pennylane_lightning/src/simulator/KernelMap.hpp @@ -0,0 +1,282 @@ +// Copyright 2022 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/** + * @file + * Set/get Default kernels for statevector + */ +#include "DispatchKeys.hpp" +#include "GateOperation.hpp" +#include "IntegerInterval.hpp" +#include "KernelType.hpp" +#include "Util.hpp" + +#include +#include +#include +#include +#include + +namespace Pennylane::KernelMap { +///@cond DEV +namespace Internal { + +int assignDefaultKernelsForGateOp(); +int assignDefaultKernelsForGeneratorOp(); +int assignDefaultKernelsForMatrixOp(); + +template struct AssignKernelForOp; + +template <> struct AssignKernelForOp { + static inline const int dummy = assignDefaultKernelsForGateOp(); +}; +template <> struct AssignKernelForOp { + static inline const int dummy = assignDefaultKernelsForGeneratorOp(); +}; +template <> struct AssignKernelForOp { + static inline const int dummy = assignDefaultKernelsForMatrixOp(); +}; +} // namespace Internal +///@endcond + +///@cond DEV +struct DispatchElement { + uint32_t priority; + Util::IntegerInterval interval; + Gates::KernelType kernel; +}; + +inline bool lower_priority(const DispatchElement &lhs, + const DispatchElement &rhs) { + return lhs.priority < rhs.priority; +} + +inline bool higher_priority(const DispatchElement &lhs, + const DispatchElement &rhs) { + return lhs.priority > rhs.priority; +} + +/** + * @brief Maintain dispatch element using a vector decreasingly-ordered by + * priority. + */ +class PriorityDispatchSet { + private: + std::vector ordered_vec_; + + public: + [[nodiscard]] bool + conflict(uint32_t test_priority, + const Util::IntegerInterval &test_interval) const { + const auto test_elt = DispatchElement{test_priority, test_interval, + Gates::KernelType::None}; + const auto [b, e] = + std::equal_range(ordered_vec_.begin(), ordered_vec_.end(), test_elt, + higher_priority); + for (auto iter = b; iter != e; ++iter) { + if (!is_disjoint(iter->interval, test_interval)) { + return true; + } + } + return false; + } + + void insert(const DispatchElement &elt) { + const auto iter_to_insert = std::upper_bound( + ordered_vec_.begin(), ordered_vec_.end(), elt, &higher_priority); + ordered_vec_.insert(iter_to_insert, elt); + } + + template void emplace(Ts &&...args) { + const auto elt = DispatchElement{std::forward(args)...}; + const auto iter_to_insert = std::upper_bound( + ordered_vec_.begin(), ordered_vec_.end(), elt, &higher_priority); + ordered_vec_.insert(iter_to_insert, elt); + } + + [[nodiscard]] Gates::KernelType getKernel(size_t num_qubits) const { + for (const auto &elt : ordered_vec_) { + if (elt.interval(num_qubits)) { + return elt.kernel; + } + } + throw std::range_error( + "Cannot find a kernel for the given number of qubits."); + } + + void clearPriority(uint32_t remove_priority) { + const auto begin = std::lower_bound( + ordered_vec_.begin(), ordered_vec_.end(), remove_priority, + [](const auto &elt, uint32_t p) { return elt.priority > p; }); + const auto end = std::upper_bound( + ordered_vec_.begin(), ordered_vec_.end(), remove_priority, + [](uint32_t p, const auto &elt) { return p > elt.priority; }); + ordered_vec_.erase(begin, end); + } +}; + +///@endcond + +struct AllThreading {}; +struct AllMemoryModel {}; + +constexpr static AllThreading all_threading{}; +constexpr static AllMemoryModel all_memory_model{}; + +/** + * @brief This class manages all data related to kernel map statevector uses. + * + * For a given number of qubit, threading, and memory model, this class + * returns the best kernels for each gate/generator/matrix operation. + */ +template class OperationKernelMap { + public: + using EnumDispatchKernalMap = + std::unordered_map, + PriorityDispatchSet, Util::PairHash>; + using EnumKernelMap = std::unordered_map; + + private: + EnumDispatchKernalMap kernel_map_; + mutable std::deque> cache_; + + /** + * @brief Allowed kernels for a given memory model + */ + const std::unordered_map> + allowed_kernels_; + + OperationKernelMap() + : allowed_kernels_{ + {CPUMemoryModel::Unaligned, + {Gates::KernelType::LM, Gates::KernelType::PI}}, + {CPUMemoryModel::Aligned256, + {Gates::KernelType::LM, Gates::KernelType::PI}}, + {CPUMemoryModel::Aligned512, + {Gates::KernelType::LM, Gates::KernelType::PI}}, + } {} + + public: + static auto getInstance() -> OperationKernelMap & { + static OperationKernelMap instance; + + return instance; + } + + void assignKernelForOp(Operation op, Threading threading, + CPUMemoryModel memory_model, uint32_t priority, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { + if (std::find(allowed_kernels_.at(memory_model).cbegin(), + allowed_kernels_.at(memory_model).cend(), + kernel) == allowed_kernels_.at(memory_model).cend()) { + throw std::invalid_argument("The given kernel is now allowed for " + "the given memory model."); + } + const auto dispatch_key = toDispatchKey(threading, memory_model); + auto &set = kernel_map_[std::make_pair(op, dispatch_key)]; + + if (set.conflict(priority, interval)) { + throw std::invalid_argument("The given interval conflicts with " + "existing intervals."); + } + + // Reset cache + cache_.clear(); + + set.emplace(priority, interval, kernel); + } + + void assignKernelForOp(Operation op, [[maybe_unused]] AllThreading dummy, + CPUMemoryModel memory_model, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { + /* Priority for all threading is 1 */ + Util::for_each_enum([=](Threading threading) { + assignKernelForOp(op, threading, memory_model, 1, interval, kernel); + }); + } + + void assignKernelForOp(Operation op, Threading threading, + [[maybe_unused]] AllMemoryModel dummy, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { + /* Priority for all memory model is 2 */ + Util::for_each_enum([=](CPUMemoryModel memory_model) { + assignKernelForOp(op, threading, memory_model, 2, interval, kernel); + }); + } + + void assignKernelForOp(Operation op, [[maybe_unused]] AllThreading dummy1, + [[maybe_unused]] AllMemoryModel dummy2, + const Util::IntegerInterval &interval, + Gates::KernelType kernel) { + /* Priority is 0 */ + Util::for_each_enum( + [=](Threading threading, CPUMemoryModel memory_model) { + assignKernelForOp(op, threading, memory_model, 0, interval, + kernel); + }); + } + + void removeKernelForOp(Operation op, Threading threading, + CPUMemoryModel memory_model, uint32_t priority) { + uint32_t dispatch_key = toDispatchKey(threading, memory_model); + const auto key = std::make_pair(op, dispatch_key); + + const auto iter = kernel_map_.find(key); + if (iter == kernel_map_.end()) { + return; + } + (iter->second).clearPriority(priority); + + // Reset cache + cache_.clear(); + } + + /** + * @brief Create map contains default kernels for operation + * + * @param num_qubits Number of qubits + * @param threading Threading context + * @param memory_model Memory model of the underlying data + */ + [[nodiscard]] auto getKernelMap(size_t num_qubits, Threading threading, + CPUMemoryModel memory_model) const + -> EnumKernelMap { + // Add mutex for cache_ when we goto multithread. + const uint32_t dispatch_key = toDispatchKey(threading, memory_model); + + const auto cache_iter = + std::find_if(cache_.begin(), cache_.end(), [=](const auto &elt) { + return (std::get<0>(elt) == num_qubits) && + (std::get<1>(elt) == dispatch_key); + }); + if (cache_iter == cache_.end()) { + std::unordered_map kernel_for_op; + + Util::for_each_enum([&](Operation op) { + const auto key = std::make_pair(op, dispatch_key); + const auto &set = kernel_map_.at(key); + kernel_for_op.emplace(op, set.getKernel(num_qubits)); + }); + if (cache_.size() == cache_size) { + cache_.pop_front(); + } + cache_.emplace_back(num_qubits, dispatch_key, kernel_for_op); + return kernel_for_op; + } + return std::get<2>(*cache_iter); + } +}; +} // namespace Pennylane::KernelMap diff --git a/pennylane_lightning/src/simulator/StateVectorCPU.hpp b/pennylane_lightning/src/simulator/StateVectorCPU.hpp index e0f944ad25..ab5d55a800 100644 --- a/pennylane_lightning/src/simulator/StateVectorCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorCPU.hpp @@ -11,9 +11,9 @@ #pragma once #include "BitUtil.hpp" -#include "DefaultKernelsForStateVector.hpp" #include "DispatchKeys.hpp" #include "Gates.hpp" +#include "KernelMap.hpp" #include "KernelType.hpp" #include "Memory.hpp" #include "StateVectorBase.hpp" @@ -48,13 +48,16 @@ class StateVectorCPU : public StateVectorBase { void setKernels(size_t num_qubits, Threading threading, CPUMemoryModel memory_model) { - auto &default_kernels = DefaultKernelsForStateVector::getInstance(); - kernel_for_gates_ = default_kernels.getGateKernelMap( - num_qubits, threading, memory_model); - kernel_for_generators_ = default_kernels.getGeneratorKernelMap( - num_qubits, threading, memory_model); - kernel_for_matrices_ = default_kernels.getMatrixKernelMap( - num_qubits, threading, memory_model); + using KernelMap::OperationKernelMap; + kernel_for_gates_ = + OperationKernelMap::getInstance() + .getKernelMap(num_qubits, threading, memory_model); + kernel_for_generators_ = + OperationKernelMap::getInstance() + .getKernelMap(num_qubits, threading, memory_model); + kernel_for_matrices_ = + OperationKernelMap::getInstance() + .getKernelMap(num_qubits, threading, memory_model); } protected: diff --git a/pennylane_lightning/src/tests/CMakeLists.txt b/pennylane_lightning/src/tests/CMakeLists.txt index 64ebd3a39d..b5cceb02be 100644 --- a/pennylane_lightning/src/tests/CMakeLists.txt +++ b/pennylane_lightning/src/tests/CMakeLists.txt @@ -71,7 +71,6 @@ target_link_libraries(compile_time_tests lightning_gates lightning_utils) set(TEST_SOURCES CreateAllWires.cpp Test_AdjDiff.cpp Test_DynamicDispatcher.cpp - Test_DefaultKernelsForStateVector.cpp Test_GateImplementations_CompareKernels.cpp Test_GateImplementations_Generator.cpp Test_GateImplementations_Inverse.cpp @@ -80,6 +79,7 @@ set(TEST_SOURCES CreateAllWires.cpp Test_GateImplementations_Param.cpp Test_GateUtil.cpp Test_Internal.cpp + Test_KernelMap.cpp Test_Measures.cpp Test_OpToMemberFuncPtr.cpp Test_StateVectorCPU.cpp diff --git a/pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp b/pennylane_lightning/src/tests/Test_KernelMap.cpp similarity index 50% rename from pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp rename to pennylane_lightning/src/tests/Test_KernelMap.cpp index 32b9d0a8f4..39c5daa024 100644 --- a/pennylane_lightning/src/tests/Test_DefaultKernelsForStateVector.cpp +++ b/pennylane_lightning/src/tests/Test_KernelMap.cpp @@ -1,55 +1,67 @@ #include "Constant.hpp" #include "ConstantUtil.hpp" -#include "DefaultKernelsForStateVector.hpp" +#include "KernelMap.hpp" #include "Util.hpp" #include using namespace Pennylane; +using namespace Pennylane::KernelMap; TEST_CASE("Test default kernels for gates are well defined", - "[Test_DefaultKernelsForStateVector]") { - auto &instance = DefaultKernelsForStateVector::getInstance(); + "[Test_KernelMap]") { + auto &instance = OperationKernelMap::getInstance(); Util::for_each_enum( [&instance](Threading threading, CPUMemoryModel memory_model) { for (size_t num_qubits = 1; num_qubits < 27; num_qubits++) { - REQUIRE_NOTHROW(instance.getGateKernelMap(num_qubits, threading, - memory_model)); + REQUIRE_NOTHROW( + instance.getKernelMap(num_qubits, threading, memory_model)); } }); } TEST_CASE("Test default kernels for generators are well defined", - "[Test_DefaultKernelsForStateVector]") { - auto &instance = DefaultKernelsForStateVector::getInstance(); + "[Test_KernelMap]") { + auto &instance = + OperationKernelMap::getInstance(); Util::for_each_enum( [&instance](Threading threading, CPUMemoryModel memory_model) { for (size_t num_qubits = 1; num_qubits < 27; num_qubits++) { - REQUIRE_NOTHROW(instance.getGeneratorKernelMap( - num_qubits, threading, memory_model)); + REQUIRE_NOTHROW( + instance.getKernelMap(num_qubits, threading, memory_model)); } }); } -TEST_CASE("Test unallowed kernel", "[Test_DefaultKernelsForStateVector]") { +TEST_CASE("Test default kernels for matrix operation are well defined", + "[Test_KernelMap]") { + auto &instance = OperationKernelMap::getInstance(); + Util::for_each_enum( + [&instance](Threading threading, CPUMemoryModel memory_model) { + for (size_t num_qubits = 1; num_qubits < 27; num_qubits++) { + REQUIRE_NOTHROW( + instance.getKernelMap(num_qubits, threading, memory_model)); + } + }); +} + +TEST_CASE("Test unallowed kernel", "[Test_KernelMap]") { using Gates::GateOperation; - using Gates::GeneratorOperation; using Gates::KernelType; - auto &instance = DefaultKernelsForStateVector::getInstance(); - REQUIRE_THROWS(instance.assignKernelForGate( + auto &instance = OperationKernelMap::getInstance(); + REQUIRE_THROWS(instance.assignKernelForOp( GateOperation::PauliX, Threading::SingleThread, CPUMemoryModel::Unaligned, 0, Util::full_domain(), KernelType::None)); } -TEST_CASE("Test few limiting cases of default kernels", - "[Test_DefaultKernelsForStateVector]") { - auto &instance = DefaultKernelsForStateVector::getInstance(); +TEST_CASE("Test few limiting cases of default kernels", "[Test_KernelMap]") { + auto &instance = OperationKernelMap::getInstance(); SECTION("Single thread, large number of qubits") { // For large N, single thread calls "LM" for all single- and two-qubit // gates. For three-qubit gates, we use PI. - auto gate_map = instance.getGateKernelMap(24, Threading::SingleThread, - CPUMemoryModel::Unaligned); + auto gate_map = instance.getKernelMap(24, Threading::SingleThread, + CPUMemoryModel::Unaligned); Util::for_each_enum( [&gate_map](Gates::GateOperation gate_op) { INFO(Util::lookup(Gates::Constant::gate_names, gate_op)); @@ -65,37 +77,36 @@ TEST_CASE("Test few limiting cases of default kernels", } SECTION("Single thread, N = 14") { // For large N = 14, IsingXX with "PI" is slightly faster - auto gate_map = instance.getGateKernelMap(14, Threading::SingleThread, - CPUMemoryModel::Unaligned); + auto gate_map = instance.getKernelMap(14, Threading::SingleThread, + CPUMemoryModel::Unaligned); REQUIRE(gate_map[Gates::GateOperation::IsingXX] == Gates::KernelType::PI); } } -TEST_CASE("Test priority works", "[Test_DefaultKernelsForStateVector]") { +TEST_CASE("Test priority works", "[Test_KernelMap]") { using Gates::GateOperation; - using Gates::GeneratorOperation; using Gates::KernelType; - auto &instance = DefaultKernelsForStateVector::getInstance(); + auto &instance = OperationKernelMap::getInstance(); SECTION("Test assignKernelForGate") { - auto original_kernel = instance.getGateKernelMap( + auto original_kernel = instance.getKernelMap( 24, Threading::SingleThread, CPUMemoryModel::Unaligned)[GateOperation::PauliX]; - instance.assignKernelForGate( - GateOperation::PauliX, Threading::SingleThread, - CPUMemoryModel::Unaligned, 100, Util::full_domain(), - KernelType::PI); + instance.assignKernelForOp(GateOperation::PauliX, + Threading::SingleThread, + CPUMemoryModel::Unaligned, 100, + Util::full_domain(), KernelType::PI); - REQUIRE(instance.getGateKernelMap( + REQUIRE(instance.getKernelMap( 24, Threading::SingleThread, CPUMemoryModel::Unaligned)[GateOperation::PauliX] == KernelType::PI); - instance.removeKernelForGate(GateOperation::PauliX, - Threading::SingleThread, - CPUMemoryModel::Unaligned, 100); - REQUIRE(instance.getGateKernelMap( + instance.removeKernelForOp(GateOperation::PauliX, + Threading::SingleThread, + CPUMemoryModel::Unaligned, 100); + REQUIRE(instance.getKernelMap( 24, Threading::SingleThread, CPUMemoryModel::Unaligned)[GateOperation::PauliX] == original_kernel); diff --git a/pennylane_lightning/src/util/IntegerInterval.hpp b/pennylane_lightning/src/util/IntegerInterval.hpp index 24f14959b9..06002ca3b0 100644 --- a/pennylane_lightning/src/util/IntegerInterval.hpp +++ b/pennylane_lightning/src/util/IntegerInterval.hpp @@ -70,7 +70,7 @@ auto in_between_closed(IntegerType from, IntegerType to) return IntegerInterval{from, to + 1}; } template -auto full_domain() -> IntegerInterval { +constexpr auto full_domain() -> IntegerInterval { return IntegerInterval{ 0, std::numeric_limits::max()}; } From e51ffb59da1bcb717db6d266981bf40840a4f231 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Thu, 3 Mar 2022 17:56:12 -0500 Subject: [PATCH 33/94] Fix for mac --- pennylane_lightning/src/util/Memory.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/pennylane_lightning/src/util/Memory.hpp b/pennylane_lightning/src/util/Memory.hpp index e78923b64d..cb271da036 100644 --- a/pennylane_lightning/src/util/Memory.hpp +++ b/pennylane_lightning/src/util/Memory.hpp @@ -10,6 +10,7 @@ // limitations under the License. #pragma once +#include #include #include #include From 0c991d4be92e86d7eee4c958b1e4ce706fb67ff2 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Fri, 4 Mar 2022 09:21:43 -0500 Subject: [PATCH 34/94] Simplified a bit --- .github/workflows/format.yml | 2 +- pennylane_lightning/src/algorithms/JacobianTape.hpp | 6 +++--- pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp | 2 -- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index c7688703ca..b4a91f94a5 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -42,7 +42,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: 3.8 + python-version: 3.7 - name: Install dependencies run: sudo apt update && sudo apt -y install clang-tidy-12 cmake g++ libomp-12-dev diff --git a/pennylane_lightning/src/algorithms/JacobianTape.hpp b/pennylane_lightning/src/algorithms/JacobianTape.hpp index 8a33e89f02..ca7d0ac6f7 100644 --- a/pennylane_lightning/src/algorithms/JacobianTape.hpp +++ b/pennylane_lightning/src/algorithms/JacobianTape.hpp @@ -86,9 +86,9 @@ template class ObsDatum { } private: - const std::vector obs_name_{}; - const std::vector obs_params_{}; - const std::vector> obs_wires_{}; + const std::vector obs_name_; + const std::vector obs_params_; + const std::vector> obs_wires_; }; /** diff --git a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp index ad21a48134..9b1e5d1630 100644 --- a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp @@ -37,7 +37,6 @@ class StateVectorManagedCPU private: using BaseType = StateVectorCPU; - // NOLINTNEXTLINE(modernize-avoid-c-arrays,hicpp-avoid-c-arrays) std::vector> data_; public: @@ -71,7 +70,6 @@ class StateVectorManagedCPU // Clang-tidy gives false positive for delegating constructor template - // NOLINTNEXTLINE(hicpp-member-init) explicit StateVectorManagedCPU( const std::vector, Alloc> &rhs, Threading threading = bestThreading(), From 4b6f82d5bfb831661207a90fe68c7cec1845afdc Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 5 Mar 2022 21:03:49 -0500 Subject: [PATCH 35/94] Add runtime/compiletime info --- bin/cpp-files | 7 +- bin/utils.py | 15 ++- cmake/process_options.cmake | 14 +++ doc/_ext/edit_on_github.py | 27 ++--- doc/conf.py | 94 +++++++-------- doc/directives.py | 28 ++--- .../src/algorithms/AdjointDiff.hpp | 9 +- pennylane_lightning/src/bindings/Bindings.cpp | 6 + pennylane_lightning/src/bindings/Bindings.hpp | 109 +++++++++++++++++- pennylane_lightning/src/gates/KernelType.hpp | 8 -- .../src/simulator/CPUMemoryModel.hpp | 21 +++- .../src/simulator/DynamicDispatcher.hpp | 2 + .../src/simulator/KernelMap.hpp | 49 +++++++- .../src/simulator/StateVectorCPU.hpp | 38 ++++++ .../src/simulator/StateVectorManagedCPU.hpp | 11 ++ pennylane_lightning/src/tests/CMakeLists.txt | 1 + .../src/tests/TestAvailableKernels.hpp | 17 --- ...est_GateImplementations_CompareKernels.cpp | 19 +-- .../src/tests/Test_RuntimeInfo.cpp | 13 +++ .../src/tests/Test_StateVectorCPU.cpp | 1 + pennylane_lightning/src/util/BitUtil.hpp | 3 +- pennylane_lightning/src/util/CMakeLists.txt | 8 +- pennylane_lightning/src/util/Macros.hpp | 77 ++++++++++++- pennylane_lightning/src/util/Memory.hpp | 71 ++++++++++-- pennylane_lightning/src/util/RuntimeInfo.cpp | 68 +++++++++++ pennylane_lightning/src/util/RuntimeInfo.hpp | 52 +++++++++ pennylane_lightning/src/util/TypeList.hpp | 22 +++- 27 files changed, 640 insertions(+), 150 deletions(-) create mode 100644 pennylane_lightning/src/tests/Test_RuntimeInfo.cpp create mode 100644 pennylane_lightning/src/util/RuntimeInfo.cpp create mode 100644 pennylane_lightning/src/util/RuntimeInfo.hpp diff --git a/bin/cpp-files b/bin/cpp-files index b09cc88cf1..7ccd202783 100755 --- a/bin/cpp-files +++ b/bin/cpp-files @@ -14,6 +14,9 @@ if __name__ == '__main__': parser = argparse.ArgumentParser( description="Output C/C++ files in json list" ) + parser.add_argument( + "--header-only", action='store_true', dest='header_only', help="whether only include header files" + ) parser.add_argument( "paths", nargs="+", metavar="DIR", help="paths to the root source directories" ) @@ -23,9 +26,9 @@ if __name__ == '__main__': args = parser.parse_args() - files = set(get_cpp_files(args.paths)) + files = set(get_cpp_files(args.paths, header_only = args.header_only)) if args.exclude_dirs: - files_excludes = set(get_cpp_files(args.exclude_dirs)) + files_excludes = set(get_cpp_files(args.exclude_dirs, header_only = args.header_only)) files -= files_excludes json.dump(list(files), sys.stdout) diff --git a/bin/utils.py b/bin/utils.py index 90d1693031..6d9dab9420 100644 --- a/bin/utils.py +++ b/bin/utils.py @@ -2,13 +2,14 @@ import re import fnmatch -SRCFILE_EXT = ("c", "cc", "cpp", "cxx", "h", "hh", "hpp", "hxx", "cu", "cuh") +SRCFILE_EXT = ["c", "cc", "cpp", "cxx", "cu"] +HEADERFILE_EXT = ["h", "hh", "hpp", "hxx", "cuh"] LIGHTNING_SOURCE_DIR = Path(__file__).resolve().parent.parent rgx_gitignore_comment = re.compile("#.*$") -def get_cpp_files_from_path(path, ignore_patterns = None, use_gitignore = True): +def get_cpp_files_from_path(path, ignore_patterns = None, use_gitignore = True, header_only = False): """return set of C++ source files from a path Args: @@ -18,7 +19,11 @@ def get_cpp_files_from_path(path, ignore_patterns = None, use_gitignore = True): """ path = Path(path) files_rel = set() # file paths relative to path - for ext in SRCFILE_EXT: + + exts = HEADERFILE_EXT + if not header_only: + exts += SRCFILE_EXT + for ext in exts: for file_path in path.rglob(f"*.{ext}"): files_rel.add(file_path.relative_to(path)) @@ -46,7 +51,7 @@ def get_cpp_files_from_path(path, ignore_patterns = None, use_gitignore = True): return set(str(path.joinpath(f)) for f in files_rel) -def get_cpp_files(paths, ignore_patterns = None, use_gitignore = True): +def get_cpp_files(paths, ignore_patterns = None, use_gitignore = True, header_only = False): """return list of C++ source files from paths. Args: @@ -56,5 +61,5 @@ def get_cpp_files(paths, ignore_patterns = None, use_gitignore = True): """ files = set() for path in paths: - files |= get_cpp_files_from_path(path, ignore_patterns, use_gitignore) + files |= get_cpp_files_from_path(path, ignore_patterns, use_gitignore, header_only) return list(files) diff --git a/cmake/process_options.cmake b/cmake/process_options.cmake index d3ecccd3f9..815a04e43d 100644 --- a/cmake/process_options.cmake +++ b/cmake/process_options.cmake @@ -62,6 +62,20 @@ else() message(STATUS "ENABLE_AVX is OFF") endif() +if(ENABLE_AVX2) + message(STATUS "ENABLE_AVX2 is ON.") + target_compile_options(lightning_compile_options INTERFACE -mavx2) +else() + message(STATUS "ENABLE_AVX2 is OFF") +endif() + +if(ENABLE_AVX512) + message(STATUS "ENABLE_AVX512 is ON.") + target_compile_options(lightning_compile_options INTERFACE -mavx512f) # Now we only use avx512f +else() + message(STATUS "ENABLE_AVX512 is OFF") +endif() + if(ENABLE_OPENMP) message(STATUS "ENABLE_OPENMP is ON.") find_package(OpenMP) diff --git a/doc/_ext/edit_on_github.py b/doc/_ext/edit_on_github.py index b69348d97d..954ed00ab4 100644 --- a/doc/_ext/edit_on_github.py +++ b/doc/_ext/edit_on_github.py @@ -8,19 +8,20 @@ import warnings -__licence__ = 'BSD (3 clause)' +__licence__ = "BSD (3 clause)" def get_github_url(app, view, path): - return 'https://github.com/{project}/{view}/{branch}/{path}'.format( + return "https://github.com/{project}/{view}/{branch}/{path}".format( project=app.config.edit_on_github_project, view=view, branch=app.config.edit_on_github_branch, - path=path) + path=path, + ) def html_page_context(app, pagename, templatename, context, doctree): - if templatename != 'page.html': + if templatename != "page.html": return if not app.config.edit_on_github_project: @@ -29,16 +30,16 @@ def html_page_context(app, pagename, templatename, context, doctree): if not doctree: return - - path = os.path.relpath(doctree.get('source'), app.builder.srcdir) - show_url = get_github_url(app, 'blob', path) - edit_url = get_github_url(app, 'edit', path) - context['show_on_github_url'] = show_url - context['edit_on_github_url'] = edit_url + path = os.path.relpath(doctree.get("source"), app.builder.srcdir) + show_url = get_github_url(app, "blob", path) + edit_url = get_github_url(app, "edit", path) + + context["show_on_github_url"] = show_url + context["edit_on_github_url"] = edit_url def setup(app): - app.add_config_value('edit_on_github_project', '', True) - app.add_config_value('edit_on_github_branch', 'master', True) - app.connect('html-page-context', html_page_context) \ No newline at end of file + app.add_config_value("edit_on_github_project", "", True) + app.add_config_value("edit_on_github_branch", "master", True) + app.connect("html-page-context", html_page_context) diff --git a/doc/conf.py b/doc/conf.py index 770f5434dc..37a6be4452 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -20,46 +20,50 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath('')) -sys.path.insert(0, os.path.abspath('_ext')) -sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath('doc')), 'doc')) +sys.path.insert(0, os.path.abspath("")) +sys.path.insert(0, os.path.abspath("_ext")) +sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath("doc")), "doc")) # For obtaining all relevant C++ source files -currdir = Path(__file__).resolve().parent # PROJECT_SOURCE_DIR/docs +currdir = Path(__file__).resolve().parent # PROJECT_SOURCE_DIR/docs PROJECT_SOURCE_DIR = currdir.parent -CPP_SOURCE_DIR = PROJECT_SOURCE_DIR.joinpath('pennylane_lightning/src') -CPP_EXCLUDE_DIRS = ['examples', 'tests'] # relative to CPP_SOURCE_DIR +CPP_SOURCE_DIR = PROJECT_SOURCE_DIR.joinpath("pennylane_lightning/src") +CPP_EXCLUDE_DIRS = ["examples", "tests"] # relative to CPP_SOURCE_DIR + def obtain_cpp_files(): - script_path = PROJECT_SOURCE_DIR.joinpath('bin/cpp-files') + script_path = PROJECT_SOURCE_DIR.joinpath("bin/cpp-files") if not script_path.exists(): - print('The project directory structure is corrupted.') + print("The project directory structure is corrupted.") sys.exit(1) exclude_dirs = [CPP_SOURCE_DIR.joinpath(exclude_dir) for exclude_dir in CPP_EXCLUDE_DIRS] - p = subprocess.run([str(script_path), CPP_SOURCE_DIR, '--exclude-dirs', *exclude_dirs], capture_output = True) + p = subprocess.run( + [str(script_path), "--header-only", CPP_SOURCE_DIR, "--exclude-dirs", *exclude_dirs], + capture_output=True, + ) file_list = json.loads(p.stdout) - file_list = ['../' + str(Path(f).relative_to(PROJECT_SOURCE_DIR)) for f in file_list] + file_list = ["../" + str(Path(f).relative_to(PROJECT_SOURCE_DIR)) for f in file_list] return file_list + CPP_FILES = obtain_cpp_files() print(CPP_FILES) - class Mock(MagicMock): - __name__ = 'foo' + __name__ = "foo" @classmethod def __getattr__(cls, name): return MagicMock() -MOCK_MODULES = ['pennylane_lightning.lightning_qubit_ops'] +MOCK_MODULES = ["pennylane_lightning.lightning_qubit_ops"] mock = Mock() for mod_name in MOCK_MODULES: @@ -68,7 +72,7 @@ def __getattr__(cls, name): # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -needs_sphinx = '1.6' +needs_sphinx = "1.6" # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -85,9 +89,9 @@ def __getattr__(cls, name): "sphinx.ext.mathjax", "sphinx.ext.napoleon", "sphinx.ext.todo", - 'sphinx.ext.viewcode', + "sphinx.ext.viewcode", "sphinx_automodapi.automodapi", - 'sphinx_automodapi.smart_resolver' + "sphinx_automodapi.smart_resolver", ] intersphinx_mapping = {"https://pennylane.readthedocs.io/en/stable/": None} @@ -114,10 +118,7 @@ def __getattr__(cls, name): # TIP: if using the sphinx-bootstrap-theme, you need # "treeViewIsBootstrap": True, "exhaleExecutesDoxygen": True, - "exhaleDoxygenStdin": ( - "INPUT = " + ' '.join(CPP_FILES) + ' ' - "EXCLUDE_SYMBOLS = std::* " - ), + "exhaleDoxygenStdin": ("INPUT = " + " ".join(CPP_FILES) + " " "EXCLUDE_SYMBOLS = std::* "), "afterTitleDescription": inspect.cleandoc( """ The Pennylane Lightning C++ API is intended to be called from Python through Pybind11. Direct use of the C++ API is currently unsupported and is provided for reference only. @@ -126,21 +127,21 @@ def __getattr__(cls, name): } # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates', 'xanadu_theme'] +templates_path = ["_templates", "xanadu_theme"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'PennyLane-Lightning' +project = "PennyLane-Lightning" copyright = "Copyright 2021" -author = 'Xanadu Inc.' +author = "Xanadu Inc." add_module_names = False @@ -149,11 +150,12 @@ def __getattr__(cls, name): # built documents. import pennylane_lightning + # The full version, including alpha/beta/rc tags. release = pennylane_lightning.__version__ # The short X.Y version. -version = re.match(r'^(\d+\.\d+)', release).expand(r'\1') +version = re.match(r"^(\d+\.\d+)", release).expand(r"\1") # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -163,19 +165,19 @@ def __getattr__(cls, name): language = None # today_fmt is used as the format for a strftime call. -today_fmt = '%Y-%m-%d' +today_fmt = "%Y-%m-%d" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. show_authors = True # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True @@ -186,12 +188,12 @@ def __getattr__(cls, name): # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -html_favicon = '_static/favicon.ico' +html_favicon = "_static/favicon.ico" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -199,26 +201,24 @@ def __getattr__(cls, name): # This is required for the alabaster theme # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars html_sidebars = { - '**' : [ - 'logo-text.html', - 'searchbox.html', - 'globaltoc.html', + "**": [ + "logo-text.html", + "searchbox.html", + "globaltoc.html", ] } # -- Xanadu theme --------------------------------------------------------- -html_theme = 'xanadu_theme' -html_theme_path = ['.'] +html_theme = "xanadu_theme" +html_theme_path = ["."] # xanadu theme options (see theme.conf for more information) html_theme_options = { # Set the name of the project to appear in the left sidebar. "project_nav_name": "PennyLane-Lightning", - # Path to a touch icon "touch_icon": "logo_new.png", - "large_toc": True, "navigation_button": "#19b37b", "navigation_button_hover": "#0e714d", @@ -229,22 +229,22 @@ def __getattr__(cls, name): "download_button": "#19b37b", } -edit_on_github_project = 'XanaduAI/pennylane-lightning' -edit_on_github_branch = 'master/doc' +edit_on_github_project = "XanaduAI/pennylane-lightning" +edit_on_github_branch = "master/doc" -#============================================================ +# ============================================================ # the order in which autodoc lists the documented members -autodoc_member_order = 'bysource' +autodoc_member_order = "bysource" # inheritance_diagram graphviz attributes -inheritance_node_attrs = dict(color='lightskyblue1', style='filled') +inheritance_node_attrs = dict(color="lightskyblue1", style="filled") -#autodoc_default_flags = ['members'] +# autodoc_default_flags = ['members'] autosummary_generate = True from directives import CustomDeviceGalleryItemDirective -def setup(app): - app.add_directive('devicegalleryitem', CustomDeviceGalleryItemDirective) +def setup(app): + app.add_directive("devicegalleryitem", CustomDeviceGalleryItemDirective) diff --git a/doc/directives.py b/doc/directives.py index 953c5d38ba..3dfe1cc5d7 100644 --- a/doc/directives.py +++ b/doc/directives.py @@ -49,25 +49,27 @@ class CustomDeviceGalleryItemDirective(Directive): required_arguments = 0 optional_arguments = 4 final_argument_whitespace = True - option_spec = {'name': directives.unchanged, - 'description': directives.unchanged, - 'link': directives.unchanged} + option_spec = { + "name": directives.unchanged, + "description": directives.unchanged, + "link": directives.unchanged, + } has_content = False add_index = False def run(self): try: - if 'name' in self.options: - name = self.options['name'] + if "name" in self.options: + name = self.options["name"] - if 'description' in self.options: - description = self.options['description'] + if "description" in self.options: + description = self.options["description"] else: - raise ValueError('description not found') + raise ValueError("description not found") - if 'link' in self.options: - link = self.options['link'] + if "link" in self.options: + link = self.options["link"] else: link = "code/qml_templates" @@ -79,10 +81,8 @@ def run(self): raise return [] - thumbnail_rst = GALLERY_TEMPLATE.format(name=name, - description=description, - link=link) - thumbnail = StringList(thumbnail_rst.split('\n')) + thumbnail_rst = GALLERY_TEMPLATE.format(name=name, description=description, link=link) + thumbnail = StringList(thumbnail_rst.split("\n")) thumb = nodes.paragraph() self.state.nested_parse(thumbnail, self.content_offset, thumb) return [thumb] diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp index fc02f3c50e..d58dad3e7b 100644 --- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp +++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp @@ -71,8 +71,7 @@ template class AdjointJacobian { size_t param_index) { jac[obs_index][param_index] = -2 * scaling_coeff * - std::imag( - innerProdC(sv1.getData(), sv2.getData(), sv1.getLength())); + std::imag(innerProdC(sv1.getDataVector(), sv2.getDataVector())); } /** @@ -397,9 +396,9 @@ template class AdjointJacobian { obs_idx++) { jac[mat_row_idx + obs_idx] = -2 * scalingFactor * - std::imag( - innerProdC(H_lambda[obs_idx].getData(), - mu.getData(), mu.getLength())); + std::imag(innerProdC( + H_lambda[obs_idx].getDataVector(), + mu.getDataVector())); } trainableParamNumber--; ++tp_it; diff --git a/pennylane_lightning/src/bindings/Bindings.cpp b/pennylane_lightning/src/bindings/Bindings.cpp index b935734087..0acaa45d83 100644 --- a/pennylane_lightning/src/bindings/Bindings.cpp +++ b/pennylane_lightning/src/bindings/Bindings.cpp @@ -378,6 +378,12 @@ PYBIND11_MODULE(lightning_qubit_ops, // NOLINT: No control over Pybind internals m.def("best_alignment", &bestCPUMemoryModel, "Best memory alignment. for the simulator."); + /* Add compile info */ + m.def("compile_info", &getCompileInfo, "Compiled binary information."); + + /* Add compile info */ + m.def("runtime_info", &getRuntimeInfo, "Runtime information."); + lightning_class_bindings(m); lightning_class_bindings(m); } diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index c0e20f5552..fe1bdfa4b3 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -20,9 +20,11 @@ #include "AdjointDiff.hpp" #include "CPUMemoryModel.hpp" #include "JacobianProd.hpp" +#include "Macros.hpp" #include "Measures.hpp" #include "Memory.hpp" #include "OpToMemberFuncPtr.hpp" +#include "RuntimeInfo.hpp" #include "StateVectorManagedCPU.hpp" #include "pybind11/complex.h" @@ -40,11 +42,12 @@ namespace Pennylane { /** - * @brief Create a `%StateVector` object from a 1D numpy complex data array. + * @brief Create a @ref Pennylane::StateVectorRawCPU object from a 1D numpy + * complex data array. * * @tparam PrecisionT Precision data type * @param numpyArray Numpy data array. - * @return StateVector `%StateVector` object. + * @return StateVectorRawCPU object. */ template auto createRaw(const pybind11::array_t> &numpyArray) @@ -65,6 +68,14 @@ auto createRaw(const pybind11::array_t> &numpyArray) {data_ptr, static_cast(numpyArrayInfo.shape[0])}); } +/** + * @brief Create a StateVectorManagedCPU object from a 1D numpy array + * by copying the internal data. + * + * @tparam PrecisionT Floating point precision type + * @param numpyArray Numpy array data-type + * @return StateVectorManagedCPU object. + */ template auto createManaged( const pybind11::array_t> &numpyArray) @@ -85,6 +96,14 @@ auto createManaged( {data_ptr, static_cast(numpyArrayInfo.shape[0])}); } +/** + * @brief Create numpy array view for the underlying data of + * `%StateVectorManagedCPU` object. + * + * @tparam PrecisionT Floating point data type + * @param sv `%StateVectorManagedCPU` object + * @return A numpy array + */ template auto toNumpyArray(const StateVectorManagedCPU &sv) -> pybind11::array_t> { @@ -92,11 +111,26 @@ auto toNumpyArray(const StateVectorManagedCPU &sv) {sv.getLength()}, {2 * sizeof(PrecisionT)}, sv.getData()); } +/** + * @brief Get memory alignment of a given numpy array. + * + * @param NumpyArray Pybind11's numpy array type. + * @return Memory model describing alignment + */ auto getNumpyArrayAlignment(const pybind11::array &numpyArray) -> CPUMemoryModel { return getMemoryModel(numpyArray.request().ptr); } +/** + * @brief Create an aligned numpy array for a given type, memory model and array + * size. + * + * @tparam T Datatype of numpy array to create + * @param memory_model Memory model to use + * @param size Size of the array to create + * @return Numpy array + */ template auto alignedNumpyArray(CPUMemoryModel memory_model, size_t size) -> pybind11::array { @@ -114,11 +148,14 @@ auto alignedNumpyArray(CPUMemoryModel memory_model, size_t size) } /** - * @brief We return an numpy array whose underlying data is allocated by + * @brief Create a numpy array whose underlying data is allocated by * lightning. * * See https://github.com/pybind/pybind11/issues/1042#issuecomment-325941022 * for capsule usage. + * + * @param size Size of the array to create + * @param dt Pybind11's datatype object */ auto allocateAlignedArray(size_t size, pybind11::dtype dt) -> pybind11::array { auto memory_model = bestCPUMemoryModel(); @@ -138,7 +175,7 @@ auto allocateAlignedArray(size_t size, pybind11::dtype dt) -> pybind11::array { /** * @brief Apply given list of operations to Numpy data array using C++ - * `%StateVector` class. + * StateVectorRawCPU class. * * @tparam PrecisionT Precision data type * @param stateNumpyArray Complex numpy data array representing statevector. @@ -157,7 +194,16 @@ void apply(pybind11::array_t> &stateNumpyArray, state.applyOperations(ops, wires, inverse, params); } -/// @cond DEV +/** + * @brief Register StateVector class to pybind. + * + * @tparam PrecisionT Floating point type for statevector + * @tparam ParamT Parameter type of gate operations for statevector + * @tparam SVType Statevector type to register + * @tparam Pyclass Pybind11's class object type + * + * @param pyclass Pybind11's class object to bind statevector + */ template void registerGatesForStateVector(PyClass &pyclass) { using Gates::GateOperation; @@ -192,4 +238,57 @@ void registerGatesForStateVector(PyClass &pyclass) { pyclass.def(gate_name.c_str(), func, doc.c_str()); }); } + +/** + * @brief Return basic information of the compiled binary. + */ +auto getCompileInfo() -> pybind11::dict { + using namespace Util::Constant; + using namespace pybind11::literals; + + const std::string_view cpu_arch_str = [] { + switch (cpu_arch) { + case CPUArch::AMD64: + return "AMD64"; + case CPUArch::PPC64: + return "PPC64"; + case CPUArch::ARM: + return "ARM"; + default: + return "Unknown"; + } + }(); + + const std::string_view compiler_name_str = [] { + switch (compiler) { + case Compiler::GCC: + return "GCC"; + case Compiler::Clang: + return "Clang"; + case Compiler::MSVC: + return "MSVC"; + case Compiler::Unknown: + return "Unknown"; + } + }(); + + const auto compiler_version_str = getCompilerVersion(); + + return pybind11::dict("cpu.arch"_a = cpu_arch_str, + "compiler.name"_a = compiler_name_str, + "compiler.version"_a = compiler_version_str, + "AVX2"_a = use_avx2, "AVX512F"_a = use_avx512f); +} + +/** + * @brief Return basic information of runtime environment + */ +auto getRuntimeInfo() -> pybind11::dict { + using namespace Util::Constant; + using namespace pybind11::literals; + + return pybind11::dict("AVX"_a = RuntimeInfo::AVX(), + "AVX2"_a = RuntimeInfo::AVX2(), + "AVX512F"_a = RuntimeInfo::AVX512F()); +} } // namespace Pennylane diff --git a/pennylane_lightning/src/gates/KernelType.hpp b/pennylane_lightning/src/gates/KernelType.hpp index f517cc1f61..d65d65235e 100644 --- a/pennylane_lightning/src/gates/KernelType.hpp +++ b/pennylane_lightning/src/gates/KernelType.hpp @@ -27,11 +27,3 @@ namespace Pennylane::Gates { */ enum class KernelType { PI, LM, None }; } // namespace Pennylane::Gates - -namespace Pennylane { -/** - * @brief List of kernels binds to Python. - */ -[[maybe_unused]] constexpr std::array kernels_to_pyexport = { - Gates::KernelType::PI, Gates::KernelType::LM}; -} // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/CPUMemoryModel.hpp b/pennylane_lightning/src/simulator/CPUMemoryModel.hpp index b6228401a0..282a80390f 100644 --- a/pennylane_lightning/src/simulator/CPUMemoryModel.hpp +++ b/pennylane_lightning/src/simulator/CPUMemoryModel.hpp @@ -1,4 +1,3 @@ - // Copyright 2022 Xanadu Quantum Technologies Inc. // Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,6 +18,7 @@ #pragma once #include "Macros.hpp" #include "Memory.hpp" +#include "RuntimeInfo.hpp" #include #include @@ -44,11 +44,22 @@ inline auto getMemoryModel(const void *ptr) -> CPUMemoryModel { return CPUMemoryModel::Unaligned; } -constexpr inline auto bestCPUMemoryModel() -> CPUMemoryModel { +/** + * @brief Choose the best memory model to use using runtime/compile-time + * information. + */ +inline auto bestCPUMemoryModel() -> CPUMemoryModel { if constexpr (use_avx512f) { - return CPUMemoryModel::Aligned512; - } else if (use_avx2) { - return CPUMemoryModel::Aligned256; + // If the binary is compiled with AVX512F support + if (Util::RuntimeInfo::AVX512F()) { + // and the CPU support it as well + return CPUMemoryModel::Aligned512; + } + } + if constexpr (use_avx2) { + if (Util::RuntimeInfo::AVX2()) { + return CPUMemoryModel::Aligned256; + } } return CPUMemoryModel::Unaligned; } diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp index d4cd500bd7..d232c32ce6 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp @@ -55,6 +55,7 @@ namespace Pennylane { */ template struct RegisterBeforeMain; +/// @cond DEV template <> struct RegisterBeforeMain { static inline const int dummy = Internal::registerAllAvailableKernels(); @@ -64,6 +65,7 @@ template <> struct RegisterBeforeMain { static inline const int dummy = Internal::registerAllAvailableKernels(); }; +/// @endcond /** * @brief DynamicDispatcher class diff --git a/pennylane_lightning/src/simulator/KernelMap.hpp b/pennylane_lightning/src/simulator/KernelMap.hpp index 51532b9c8c..f6c3d6632a 100644 --- a/pennylane_lightning/src/simulator/KernelMap.hpp +++ b/pennylane_lightning/src/simulator/KernelMap.hpp @@ -124,7 +124,6 @@ class PriorityDispatchSet { ordered_vec_.erase(begin, end); } }; - ///@endcond struct AllThreading {}; @@ -167,12 +166,32 @@ template class OperationKernelMap { } {} public: + /** + * @brief Get a singleton instance. + * + * return A singleton instance. + */ static auto getInstance() -> OperationKernelMap & { static OperationKernelMap instance; return instance; } + /** + * @brief Assign a kernel for a given operation, threading, and memory + * model. + * + * Variable `%priority` set the priority of the given kernel when multiple + * choices are available. The given `%interval` must be disjoint + * with all existing intervals with a given priority. + * + * @param op Operation to use as a dispatch key + * @param threading Threading option to use as a dispatch key + * @param memory_model Memory model to use as a dispatch key + * @param priority Priority of this assignment + * @param interval Range of the number of qubits to use this kernel + * @param kernel Kernel to assign + */ void assignKernelForOp(Operation op, Threading threading, CPUMemoryModel memory_model, uint32_t priority, const Util::IntegerInterval &interval, @@ -197,6 +216,10 @@ template class OperationKernelMap { set.emplace(priority, interval, kernel); } + /** + * @brief Assign kernel for given operation and memory model for all + * threading options. The priority of this assignment is 1. + */ void assignKernelForOp(Operation op, [[maybe_unused]] AllThreading dummy, CPUMemoryModel memory_model, const Util::IntegerInterval &interval, @@ -207,6 +230,10 @@ template class OperationKernelMap { }); } + /** + * @brief Assign kernel for given operation and threading option for all + * memory models. The priority of this assignment is 2. + */ void assignKernelForOp(Operation op, Threading threading, [[maybe_unused]] AllMemoryModel dummy, const Util::IntegerInterval &interval, @@ -217,6 +244,10 @@ template class OperationKernelMap { }); } + /** + * @brief Assign kernel for a given operation for all memory model and all + * threading options. The priority of this assignment is 0. + */ void assignKernelForOp(Operation op, [[maybe_unused]] AllThreading dummy1, [[maybe_unused]] AllMemoryModel dummy2, const Util::IntegerInterval &interval, @@ -229,6 +260,15 @@ template class OperationKernelMap { }); } + /** + * @brief Remove an assigned kernel for the given operation, threading, + * and memory model. + * + * @param op Operation + * @param threading Threading option + * @param memory_model Memory model + * @param priority Priority to remove + */ void removeKernelForOp(Operation op, Threading threading, CPUMemoryModel memory_model, uint32_t priority) { uint32_t dispatch_key = toDispatchKey(threading, memory_model); @@ -250,11 +290,12 @@ template class OperationKernelMap { * @param num_qubits Number of qubits * @param threading Threading context * @param memory_model Memory model of the underlying data + * @return A kernel map for given keys */ [[nodiscard]] auto getKernelMap(size_t num_qubits, Threading threading, CPUMemoryModel memory_model) const -> EnumKernelMap { - // Add mutex for cache_ when we goto multithread. + // TODO: Add mutex for cache_ when we goto multithread. const uint32_t dispatch_key = toDispatchKey(threading, memory_model); const auto cache_iter = @@ -271,9 +312,9 @@ template class OperationKernelMap { kernel_for_op.emplace(op, set.getKernel(num_qubits)); }); if (cache_.size() == cache_size) { - cache_.pop_front(); + cache_.pop_back(); } - cache_.emplace_back(num_qubits, dispatch_key, kernel_for_op); + cache_.emplace_front(num_qubits, dispatch_key, kernel_for_op); return kernel_for_op; } return std::get<2>(*cache_iter); diff --git a/pennylane_lightning/src/simulator/StateVectorCPU.hpp b/pennylane_lightning/src/simulator/StateVectorCPU.hpp index ab5d55a800..b113ece944 100644 --- a/pennylane_lightning/src/simulator/StateVectorCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorCPU.hpp @@ -46,6 +46,14 @@ class StateVectorCPU : public StateVectorBase { std::unordered_map kernel_for_matrices_; + /** + * @brief Internal function set kernels for all operations depending on + * provided dispatch options. + * + * @param num_qubits Number of qubits of the statevector + * @param threading Threading option + * @param memory_model Memory model + */ void setKernels(size_t num_qubits, Threading threading, CPUMemoryModel memory_model) { using KernelMap::OperationKernelMap; @@ -90,5 +98,35 @@ class StateVectorCPU : public StateVectorBase { return memory_model_; } [[nodiscard]] inline Threading threading() const { return threading_; } + + [[nodiscard]] inline auto getGateKernelMap() const & -> const + std::unordered_map & { + return kernel_for_gates_; + } + + [[nodiscard]] inline auto getGateKernelMap() + && -> std::unordered_map { + return kernel_for_gates_; + } + + [[nodiscard]] inline auto getGeneratorKernelMap() const & -> const + std::unordered_map & { + return kernel_for_generators_; + } + + [[nodiscard]] inline auto getGeneratorKernelMap() + && -> std::unordered_map { + return kernel_for_generators_; + } + + [[nodiscard]] inline auto getMatrixKernelMap() const & -> const + std::unordered_map & { + return kernel_for_matrices_; + } + + [[nodiscard]] inline auto getMatrixKernelMap() + && -> std::unordered_map { + return kernel_for_matrices_; + } }; } // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp index 9b1e5d1630..be3edd53fc 100644 --- a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp @@ -92,6 +92,17 @@ class StateVectorManagedCPU return data_.data(); } + [[nodiscard]] auto getDataVector() + -> std::vector> + & { + return data_; + } + + [[nodiscard]] auto getDataVector() const -> const + std::vector> & { + return data_; + } + /** * @brief Update data of the class to new_data * diff --git a/pennylane_lightning/src/tests/CMakeLists.txt b/pennylane_lightning/src/tests/CMakeLists.txt index b5cceb02be..35d88a5872 100644 --- a/pennylane_lightning/src/tests/CMakeLists.txt +++ b/pennylane_lightning/src/tests/CMakeLists.txt @@ -82,6 +82,7 @@ set(TEST_SOURCES CreateAllWires.cpp Test_KernelMap.cpp Test_Measures.cpp Test_OpToMemberFuncPtr.cpp + Test_RuntimeInfo.cpp Test_StateVectorCPU.cpp Test_Util.cpp Test_VectorJacobianProduct.cpp) diff --git a/pennylane_lightning/src/tests/TestAvailableKernels.hpp b/pennylane_lightning/src/tests/TestAvailableKernels.hpp index 669d98ddc8..1139abb961 100644 --- a/pennylane_lightning/src/tests/TestAvailableKernels.hpp +++ b/pennylane_lightning/src/tests/TestAvailableKernels.hpp @@ -40,23 +40,6 @@ check_kernels_are_available(const std::array &arr) -> bool { return true; } -/******************************************************************************* - * Check all kernels in kernels_to_pyexport are available - ******************************************************************************/ - -constexpr auto check_kernels_to_pyexport() -> bool { - // TODO: change to constexpr std::any_of in C++20 - // NOLINTNEXTLINE (readability-use-anyofallof) - for (const auto &kernel : kernels_to_pyexport) { - if (!is_available_kernel(kernel)) { - return false; - } - } - return true; -} -static_assert(check_kernels_to_pyexport(), - "Some of Kernels in Python export is not available."); - /******************************************************************************* * Check each element in kernelIdNamesPairs is unique ******************************************************************************/ diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp index 04ff09cc17..c60b31bf48 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp @@ -25,6 +25,10 @@ using namespace Pennylane; using namespace Pennylane::Gates; using namespace Pennylane::Util; +namespace { +using namespace Pennylane::Gates::Constant; +} // namespace + using std::vector; template std::string kernelsToString() { @@ -32,7 +36,7 @@ template std::string kernelsToString() { return std::string(TypeList::Type::name) + ", " + kernelsToString(); } - return std::string(""); + return ""; } /* Type transformation */ @@ -105,8 +109,8 @@ void testApplyGate(RandomEngine &re, size_t num_qubits) { using Kernels = typename KernelsImplementingGate::Type; - INFO("Kernels implementing " << lookup(Constant::gate_names, gate_op) - << " are " << kernelsToString()); + INFO("Kernels implementing " << lookup(gate_names, gate_op) << " are " + << kernelsToString()); INFO("PrecisionT, ParamT = " << PrecisionToName::value << ", " << PrecisionToName::value); @@ -114,7 +118,7 @@ void testApplyGate(RandomEngine &re, size_t num_qubits) { const auto all_wires = crateAllWires(num_qubits, gate_op, true); for (const auto &wires : all_wires) { const auto params = createParams(gate_op); - const auto gate_name = lookup(Constant::gate_names, gate_op); + const auto gate_name = lookup(gate_names, gate_op); DYNAMIC_SECTION( "Test gate " << gate_name @@ -156,10 +160,9 @@ void testAllGatesIter(RandomEngine &re, size_t max_num_qubits) { if constexpr (gate_idx < static_cast(GateOperation::END)) { constexpr static auto gate_op = static_cast(gate_idx); - size_t min_num_qubits = - array_has_elt(Constant::multi_qubit_gates, gate_op) - ? 1 - : lookup(Constant::gate_wires, gate_op); + size_t min_num_qubits = array_has_elt(multi_qubit_gates, gate_op) + ? 1 + : lookup(gate_wires, gate_op); for (size_t num_qubits = min_num_qubits; num_qubits < max_num_qubits; num_qubits++) { testApplyGate(re, num_qubits); diff --git a/pennylane_lightning/src/tests/Test_RuntimeInfo.cpp b/pennylane_lightning/src/tests/Test_RuntimeInfo.cpp new file mode 100644 index 0000000000..93823e386b --- /dev/null +++ b/pennylane_lightning/src/tests/Test_RuntimeInfo.cpp @@ -0,0 +1,13 @@ +#include "Macros.hpp" +#include "RuntimeInfo.hpp" + +#include + +using namespace Pennylane::Util; + +TEST_CASE("Runtime information is correct", "[Test_RuntimeInfo]") { + INFO("RuntimeInfo::AVX " << RuntimeInfo::AVX()); + INFO("RuntimeInfo::AVX2 " << RuntimeInfo::AVX2()); + INFO("RuntimeInfo::AVX512F " << RuntimeInfo::AVX512F()); + REQUIRE(true); +} diff --git a/pennylane_lightning/src/tests/Test_StateVectorCPU.cpp b/pennylane_lightning/src/tests/Test_StateVectorCPU.cpp index 5b1e263de2..a812c212ea 100644 --- a/pennylane_lightning/src/tests/Test_StateVectorCPU.cpp +++ b/pennylane_lightning/src/tests/Test_StateVectorCPU.cpp @@ -35,6 +35,7 @@ TEMPLATE_TEST_CASE("StateVectorManagedCPU::StateVectorManagedCPU", REQUIRE(sv.getNumQubits() == 4); REQUIRE(sv.getLength() == 16); + REQUIRE(sv.getDataVector().size() == 16); } SECTION("StateVectorManagedCPU {const " "StateVectorRawCPU&}") { diff --git a/pennylane_lightning/src/util/BitUtil.hpp b/pennylane_lightning/src/util/BitUtil.hpp index d6996a77d8..83ffe49995 100644 --- a/pennylane_lightning/src/util/BitUtil.hpp +++ b/pennylane_lightning/src/util/BitUtil.hpp @@ -174,7 +174,8 @@ inline auto log2PerfectPower(unsigned long val) -> size_t { constexpr auto constLog2PerfectPower(size_t value) -> size_t { if (value == 0) { - return 0; // not well defined + return 0; // not well defined. TODO: Raise an exception instead in + // a later version. } size_t n = 0; while ((value & 1U) == 0U) { diff --git a/pennylane_lightning/src/util/CMakeLists.txt b/pennylane_lightning/src/util/CMakeLists.txt index 20e75282f5..36b51f00e6 100644 --- a/pennylane_lightning/src/util/CMakeLists.txt +++ b/pennylane_lightning/src/util/CMakeLists.txt @@ -1,7 +1,11 @@ project(lightning_utils LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) -add_library(lightning_utils INTERFACE) +set(UTIL_FILES RuntimeInfo.cpp CACHE INTERNAL "" FORCE) + +add_library(lightning_utils STATIC ${UTIL_FILES}) target_include_directories(lightning_utils INTERFACE $ $ -) \ No newline at end of file +) + +set_property(TARGET lightning_utils PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/pennylane_lightning/src/util/Macros.hpp b/pennylane_lightning/src/util/Macros.hpp index eeba364ba8..a8cb8c1d7d 100644 --- a/pennylane_lightning/src/util/Macros.hpp +++ b/pennylane_lightning/src/util/Macros.hpp @@ -13,10 +13,19 @@ // limitations under the License. /** * @file - * Define some builtin alternatives + * Define macros and compile-time constants. */ #pragma once +#include + +/** + * @brief Predefined macro variable to a string. Use std::format instead in + * C++20. + */ +#define PL_TO_STR_INDIR(x) #x +#define PL_TO_STR(VAR) PL_TO_STR_INDIR(VAR) + #if defined(__GNUC__) || defined(__clang__) #define PL_UNREACHABLE __builtin_unreachable() #elif defined(_MSC_VER) @@ -90,3 +99,69 @@ #define PL_FORCE_INLINE #endif #endif + +namespace Pennylane::Util::Constant { +enum class CPUArch { AMD64, PPC64, ARM, Unknown }; + +constexpr auto getCPUArchClangGCC() { +#if defined(__x86_64__) + return CPUArch::AMD64; +#elif defined(__powerpc64__) + return CPUArch::PPC64; +#elif defined(__arm__) + return CPUArch::ARM; +#else + return CPUArch::Unknown; +#endif +} + +constexpr auto getCPUArchMSVC() { +#if defined(_M_AMD64) + return CPUArch::AMD64; +#elif defined(_M_PPC) + return CPUArch::PPC64; +#elif defined(_M_ARM) + return CPUArch::ARM; +#else + return CPUArch::Unknown; +#endif +} + +#if defined(__GNUC__) || defined(__clang__) +[[maybe_unused]] constexpr static auto cpu_arch = getCPUArchClangGCC(); +#elif defined(_MSC_VER) +[[maybe_unused]] constexpr static auto cpu_arch = getCPUArchMSVC(); +#else +[[maybe_unused]] constexpr static auto cpu_arch = CPUArch::Unknown; +#endif + +enum class Compiler { GCC, Clang, MSVC, Unknown }; + +template +constexpr auto getCompilerVersion() -> std::string_view { + return "Unknown version"; +} +template <> +constexpr auto getCompilerVersion() -> std::string_view { + return PL_TO_STR(__GNUC__) "." PL_TO_STR(__GNUC_MINOR__) "." PL_TO_STR( + __GNUC_PATCHLEVEL__); +} +template <> +constexpr auto getCompilerVersion() -> std::string_view { + return PL_TO_STR(__clang_major__) "." PL_TO_STR( + __clang_minor__) "." PL_TO_STR(__clang_patchlevel__); +} +template <> +constexpr auto getCompilerVersion() -> std::string_view { + return PL_TO_STR(_MSC_FULL_VER); +} +#if defined(__GNUC__) && !defined(__llvm__) && !defined(__INTEL_COMPILER) +[[maybe_unused]] constexpr static auto compiler = Compiler::GCC; +#elif defined(__clang__) +[[maybe_unused]] constexpr static auto compiler = Compiler::Clang; +#elif defined(_MSC_VER) +[[maybe_unused]] constexpr static auto compiler = Compiler::MSVC; +#else +[[maybe_unused]] constexpr static auto compiler = Compiler::Unknown; +#endif +} // namespace Pennylane::Util::Constant diff --git a/pennylane_lightning/src/util/Memory.hpp b/pennylane_lightning/src/util/Memory.hpp index cb271da036..e3779e42f7 100644 --- a/pennylane_lightning/src/util/Memory.hpp +++ b/pennylane_lightning/src/util/Memory.hpp @@ -22,17 +22,22 @@ #include "BitUtil.hpp" #include "TypeList.hpp" -/* Apple clang does not support std::aligned_alloc in Mac 10.14 */ - namespace Pennylane { /** * @brief Custom aligned allocate function. As appleclang does not support * std::aligned_alloc in Mac OS 10.14, we use posix_memalign function. * - * Note that alignment must be larger than max_align_t. + * Note that alignment must be larger than max_align_t. Otherwise, the behavior + * is undefined. + * + * @param alignment Alignment value we want for the data pointer + * @param bytes Number of bytes to allocate + * @return Memory pointer */ inline auto alignedAlloc(uint32_t alignment, size_t bytes) -> void * { -#if defined(__clang__) // probably AppleClang +#if defined(__clang__) + /* Apple clang does not support std::aligned_alloc in Mac 10.14. + * Thus we use Posix function instead. */ void *p; posix_memalign(&p, alignment, bytes); return p; @@ -43,6 +48,11 @@ inline auto alignedAlloc(uint32_t alignment, size_t bytes) -> void * { #endif } +/** + * @brief Free memory allocated by alignedAlloc. + * + * @param p Pointer to the memory location allocated by aligendAlloc + */ inline void alignedFree(void *p) { #if defined(__clang__) return ::free(p); // NOLINT(hicpp-no-malloc) @@ -53,12 +63,25 @@ inline void alignedFree(void *p) { #endif } +/** + * @brief C++ Allocator class for aligned memory. + * + * @tparam T Datatype to allocate + */ template struct AlignedAllocator { uint32_t alignment_; using value_type = T; + /** + * @brief Constructor of AlignedAllocator class + * + * @param alignment Memory alignment we want. + */ constexpr explicit AlignedAllocator(uint32_t alignment) : alignment_{alignment} { + // We do not check input now as it doesn't allow the constructor to be + // a constexpr. + // TODO: Using exception is allowed in GCC>=10 // assert(Util::isPerfectPowerOf2(alignment)); } @@ -69,6 +92,12 @@ template struct AlignedAllocator { [[maybe_unused]] const AlignedAllocator &rhs) noexcept : alignment_{rhs.alignment_} {} + /** + * @brief Allocate memory with for the given number of datatype T + * + * @param size The number of T objects for the allocation + * @return Allocated aligned memory + */ [[nodiscard]] T *allocate(std::size_t size) { if (size == 0) { return nullptr; @@ -86,6 +115,12 @@ template struct AlignedAllocator { return static_cast(p); } + /** + * @brief Deallocate allocated memory + * + * @param p Pointer to the allocated data + * @param size Size of the data we allocated (unused). + */ void deallocate(T *p, [[maybe_unused]] std::size_t size) noexcept { if (alignment_ > alignof(std::max_align_t)) { alignedFree(p); @@ -103,25 +138,29 @@ template struct AlignedAllocator { } }; +/** + * @brief Compare two allocators + * + * By [the standard](https://en.cppreference.com/w/cpp/named_req/Allocator), + * two allocators are equal if the memory allocated by one can be deallocated + * by the other. + */ template bool operator==([[maybe_unused]] const AlignedAllocator &lhs, [[maybe_unused]] const AlignedAllocator &rhs) { return lhs.alignment_ == rhs.alignment_; } +/** + * @brief Compare two allocators. See `%operator==` above. + */ template bool operator!=([[maybe_unused]] const AlignedAllocator &lhs, [[maybe_unused]] const AlignedAllocator &rhs) { return lhs.alignment_ != rhs.alignment_; } -/** - * @brief This function calculate the common multiplier of alignments of all - * kernels. - * - * As all alignment must be a multiple of 2, we just can choose the maximum - * alignment. - */ +///@cond DEV template struct commonAlignmentHelper { constexpr static uint32_t value = std::max(TypeList::Type::packed_bytes, @@ -130,7 +169,17 @@ template struct commonAlignmentHelper { template <> struct commonAlignmentHelper { constexpr static uint32_t value = 4U; }; +///@endcond +/** + * @brief This function calculate the common multiplier of alignments of the + * given kernels in TypeList. + * + * As all alignment must be a power of 2, we just can choose the maximum + * alignment. + * + * @tparam TypeList Type list of kernels. + */ template [[maybe_unused]] constexpr static size_t common_alignment = commonAlignmentHelper::value; diff --git a/pennylane_lightning/src/util/RuntimeInfo.cpp b/pennylane_lightning/src/util/RuntimeInfo.cpp new file mode 100644 index 0000000000..e260cc6894 --- /dev/null +++ b/pennylane_lightning/src/util/RuntimeInfo.cpp @@ -0,0 +1,68 @@ +// Copyright 2022 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "RuntimeInfo.hpp" + +#if defined(__GNUC__) || defined(__clang__) +#include +#elif defined(_MSC_VER) +#include +#endif +namespace Pennylane::Util { +#if defined(__GNUC__) || defined(__clang__) +RuntimeInfo::InternalRuntimeInfo::InternalRuntimeInfo() { + const auto nids = __get_cpuid_max(0x00, nullptr); + if (nids == 0) { + return; // cpuid is not supported + } + + unsigned int eax = 0; + unsigned int ebx = 0; + unsigned int ecx = 0; + unsigned int edx = 0; + if (nids >= 1) { + eax = 1; + __get_cpuid(1, &eax, &ebx, &ecx, &edx); + f_1_ecx = ecx; + f_1_edx = edx; + } + if (nids >= 7) { // NOLINT(readability-magic-numbers) + // NOLINTNEXTLINE(readability-magic-numbers) + __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx); + f_7_ebx = ebx; + f_7_ecx = ecx; + } +} +#elif defined(_MSC_VER) +RuntimeInfo::InternalRuntimeInfo::InternalRuntimeInfo() { + std::array cpui; + __cpuid(cpui.data(), 0); + + nids = cpui[0]; + + if (nids >= 1) { + __cpuidex(cpui.data(), 1, 0); + f_1_ecx = cpui[2]; + f_1_edx = cpui[3] + } + + if (nids >= 7) { + __cpuidex(cpui.data(), 7, 0); + f_7_ebx = cpui[1]; + f_7_ecx = cpui[2] + } +} +#else +RuntimeInfo::InternalRuntimeInfo::InternalRuntimeInfo(){}; +#endif +} // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/RuntimeInfo.hpp b/pennylane_lightning/src/util/RuntimeInfo.hpp new file mode 100644 index 0000000000..416422bd45 --- /dev/null +++ b/pennylane_lightning/src/util/RuntimeInfo.hpp @@ -0,0 +1,52 @@ +// Copyright 2022 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/** + * @file + * Runtime information based on cpuid + */ +#pragma once +#include + +namespace Pennylane::Util { +/** + * @brief This class is only usable in x86 or AMD64 architecture. + */ +class RuntimeInfo { + private: + struct InternalRuntimeInfo { + InternalRuntimeInfo(); + + std::bitset<32> f_1_ecx; + std::bitset<32> f_1_edx; + std::bitset<32> f_7_ebx; + std::bitset<32> f_7_ecx; + }; + + static const inline InternalRuntimeInfo internal_runtime_info_; + + public: + static inline bool AVX() { + // NOLINTNEXTLINE(readability-magic-numbers) + return internal_runtime_info_.f_1_ecx[28]; + } + static inline bool AVX2() { + // NOLINTNEXTLINE(readability-magic-numbers) + return internal_runtime_info_.f_7_ebx[5]; + } + static inline bool AVX512F() { + // NOLINTNEXTLINE(readability-magic-numbers) + return internal_runtime_info_.f_7_ebx[16]; + } +}; +} // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/TypeList.hpp b/pennylane_lightning/src/util/TypeList.hpp index 97db820da7..a53c3cbd5d 100644 --- a/pennylane_lightning/src/util/TypeList.hpp +++ b/pennylane_lightning/src/util/TypeList.hpp @@ -41,22 +41,33 @@ template struct TypeNode { */ template using TypeList = TypeNode; +/** + * @brief Get N-th type of a type list. + * + * @tparam TypeList Type list + * @tparam n The position of a type to extract + */ template struct getNth { using Type = typename getNth::Type; }; +/// @cond DEV template struct getNth { static_assert(!std::is_same_v, "The given n is larger than the length of the type list."); using Type = typename TypeList::Type; }; +/// @endcod /** - * @brief Alias + * @brief Convenient of alias of getNth */ template using getNthType = typename getNth::Type; +/** + * @brief Get the size of a type list + */ template constexpr size_t length() { if constexpr (std::is_same_v) { return 0; @@ -65,8 +76,15 @@ template constexpr size_t length() { } } +/** + * @brief Prepend a type to a type list. + * + * @tparam T Type to prepend + * @tparam U TypeList + */ template struct PrependToTypeList; +/// @cond DEV template struct PrependToTypeList> { using Type = TypeNode; @@ -74,5 +92,5 @@ struct PrependToTypeList> { template struct PrependToTypeList { using Type = TypeNode; }; - +/// @endcond } // namespace Pennylane::Util From fc54bcb88adeaa2f324f0dc5945d28986d0bde4c Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 5 Mar 2022 21:10:43 -0500 Subject: [PATCH 36/94] Fix for windows --- pennylane_lightning/src/bindings/Bindings.hpp | 4 +++- pennylane_lightning/src/util/RuntimeInfo.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index fe1bdfa4b3..2c213eff40 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -256,6 +256,8 @@ auto getCompileInfo() -> pybind11::dict { return "ARM"; default: return "Unknown"; + default: + break; } }(); @@ -267,7 +269,7 @@ auto getCompileInfo() -> pybind11::dict { return "Clang"; case Compiler::MSVC: return "MSVC"; - case Compiler::Unknown: + default: return "Unknown"; } }(); diff --git a/pennylane_lightning/src/util/RuntimeInfo.cpp b/pennylane_lightning/src/util/RuntimeInfo.cpp index e260cc6894..b8599d770f 100644 --- a/pennylane_lightning/src/util/RuntimeInfo.cpp +++ b/pennylane_lightning/src/util/RuntimeInfo.cpp @@ -48,7 +48,7 @@ RuntimeInfo::InternalRuntimeInfo::InternalRuntimeInfo() { std::array cpui; __cpuid(cpui.data(), 0); - nids = cpui[0]; + int nids = cpui[0]; if (nids >= 1) { __cpuidex(cpui.data(), 1, 0); From ad5c60aabf87f277b87491c2d487bbc8ec1175f2 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 5 Mar 2022 22:32:33 -0500 Subject: [PATCH 37/94] Fix --- pennylane_lightning/src/bindings/Bindings.hpp | 2 -- .../src/examples/benchmark_gate.cpp | 20 +++++++++---------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index 2c213eff40..dc5bcbc873 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -256,8 +256,6 @@ auto getCompileInfo() -> pybind11::dict { return "ARM"; default: return "Unknown"; - default: - break; } }(); diff --git a/pennylane_lightning/src/examples/benchmark_gate.cpp b/pennylane_lightning/src/examples/benchmark_gate.cpp index 7dff1f507c..a20b92d4b9 100644 --- a/pennylane_lightning/src/examples/benchmark_gate.cpp +++ b/pennylane_lightning/src/examples/benchmark_gate.cpp @@ -19,8 +19,7 @@ using PrecisionT = double; #endif using namespace Pennylane; -using namespace Pennylane::Gates; -using namespace Pennylane::Util; +using Util::operator<<; struct GateDesc { std::string name; @@ -45,18 +44,19 @@ auto generateGateSequence(RandomEngine &re, const std::string &gate_name, const size_t num_reps, const size_t num_qubits, const size_t num_wires_for_multi_qubit) -> std::vector { - using Gates::Constant::multi_qubit_gates; + using namespace Gates::Constant; + using Gates::GateOperation; - const GateOperation gate_op = Util::lookup( - Util::reverse_pairs(Constant::gate_names), std::string_view(gate_name)); + const GateOperation gate_op = Util::lookup(Util::reverse_pairs(gate_names), + std::string_view(gate_name)); const size_t num_wires = [=]() { if (Util::array_has_elt(multi_qubit_gates, gate_op)) { // if multi qubit gate return num_wires_for_multi_qubit; } - return Util::lookup(Constant::gate_wires, gate_op); + return Util::lookup(gate_wires, gate_op); }(); - const size_t num_params = Util::lookup(Constant::gate_num_params, gate_op); + const size_t num_params = Util::lookup(gate_num_params, gate_op); std::vector gate_seq; std::uniform_int_distribution inverse_dist(0, 1); @@ -79,7 +79,7 @@ auto generateGateSequence(RandomEngine &re, const std::string &gate_name, return gate_seq; } -double benchmarkGate(KernelType kernel, const size_t num_qubits, +double benchmarkGate(Gates::KernelType kernel, const size_t num_qubits, const std::vector &gate_seq) { // Run benchmark. Total num_reps number of gates is used. StateVectorManagedCPU svdat{num_qubits}; @@ -97,7 +97,7 @@ double benchmarkGate(KernelType kernel, const size_t num_qubits, } template -double runBenchmarkGate(RandomEngine &re, KernelType kernel, +double runBenchmarkGate(RandomEngine &re, Gates::KernelType kernel, const std::string &gate_name, size_t num_reps, size_t num_qubits, size_t num_wires_for_multi_qubit) { auto gate_seq = generateGateSequence(re, gate_name, num_reps, num_qubits, @@ -127,7 +127,7 @@ double runBenchmarkGate(RandomEngine &re, KernelType kernel, * @return Returns 0 is completed successfully */ int main(int argc, char *argv[]) { - namespace Constant = Gates::Constant; + using namespace Pennylane::Gates; // Handle input if (argc != 5 && argc != 6) { // NOLINT(readability-magic-numbers) std::cerr From 39766bf8921a3287a4aa9bb941297e2f47f06378 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 5 Mar 2022 22:53:17 -0500 Subject: [PATCH 38/94] Fix for MSVC --- pennylane_lightning/src/util/RuntimeInfo.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pennylane_lightning/src/util/RuntimeInfo.cpp b/pennylane_lightning/src/util/RuntimeInfo.cpp index b8599d770f..c6cd5ff803 100644 --- a/pennylane_lightning/src/util/RuntimeInfo.cpp +++ b/pennylane_lightning/src/util/RuntimeInfo.cpp @@ -13,11 +13,14 @@ // limitations under the License. #include "RuntimeInfo.hpp" +#include + #if defined(__GNUC__) || defined(__clang__) #include #elif defined(_MSC_VER) #include #endif + namespace Pennylane::Util { #if defined(__GNUC__) || defined(__clang__) RuntimeInfo::InternalRuntimeInfo::InternalRuntimeInfo() { @@ -59,7 +62,7 @@ RuntimeInfo::InternalRuntimeInfo::InternalRuntimeInfo() { if (nids >= 7) { __cpuidex(cpui.data(), 7, 0); f_7_ebx = cpui[1]; - f_7_ecx = cpui[2] + f_7_ecx = cpui[2]; } } #else From 34ee6586f1db79d81922d19632da9f2a3eaca823 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 5 Mar 2022 23:32:34 -0500 Subject: [PATCH 39/94] Fix benchmark plot --- .../src/examples/plot_benchmark.py | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/pennylane_lightning/src/examples/plot_benchmark.py b/pennylane_lightning/src/examples/plot_benchmark.py index 0d650071ec..329b833ead 100755 --- a/pennylane_lightning/src/examples/plot_benchmark.py +++ b/pennylane_lightning/src/examples/plot_benchmark.py @@ -1,26 +1,24 @@ #!/usr/bin/env python3 -import csv import sys import numpy as np from pathlib import Path import matplotlib.pyplot as plt import argparse +import json import re plt.rc("font", family="sans-serif") -def parse_result_csv(filepath): +def parse_result_json(filepath): n_qubits = [] times = [] - with filepath.open() as csvfile: - reader = csv.reader(csvfile) - next(reader) # ignore the first line - for row in reader: - n_qubits.append(int(row[0])) - times.append(float(row[1])) + with filepath.open() as f: + data = json.load(f) + n_qubits = [int(d["N"]) for d in data] + times = [float(d["time"]) for d in data] return n_qubits, times @@ -37,10 +35,10 @@ def parse_result_csv(filepath): res_dir = Path(args.path) gate_name = args.gate_name - filename_rgx = re.compile(f"^benchmark_(.*?)_{gate_name}.csv$") + filename_rgx = re.compile(f"^{gate_name}_(.*?).json$") res_files = [] - for file in res_dir.glob("*.csv"): + for file in res_dir.glob("*.json"): m = filename_rgx.match(file.name) if m is not None: res_files.append((m.group(1), file)) @@ -57,7 +55,7 @@ def parse_result_csv(filepath): total_num_qubits = set() for kernel_idx, (kernel_name, res_file) in enumerate(res_files): - n_qubits, times = parse_result_csv(res_file) + n_qubits, times = parse_result_json(res_file) total_num_qubits |= set(n_qubits) n_qubits = np.array(n_qubits, dtype=float) plt.bar(n_qubits + 0.8 * (kernel_idx - num_kernels / 2 + 1 / 2), times, label=kernel_name) From e34ca57da69054e805e7fa53929381afba6bad24 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 5 Mar 2022 23:34:16 -0500 Subject: [PATCH 40/94] Fix for MSVC --- pennylane_lightning/src/util/RuntimeInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/src/util/RuntimeInfo.cpp b/pennylane_lightning/src/util/RuntimeInfo.cpp index c6cd5ff803..5a208cb540 100644 --- a/pennylane_lightning/src/util/RuntimeInfo.cpp +++ b/pennylane_lightning/src/util/RuntimeInfo.cpp @@ -56,7 +56,7 @@ RuntimeInfo::InternalRuntimeInfo::InternalRuntimeInfo() { if (nids >= 1) { __cpuidex(cpui.data(), 1, 0); f_1_ecx = cpui[2]; - f_1_edx = cpui[3] + f_1_edx = cpui[3]; } if (nids >= 7) { From b7f1e6e54679ccb6509b37d1f3ce3d34261f3a4c Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sun, 6 Mar 2022 00:20:01 -0500 Subject: [PATCH 41/94] Rollback makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index edef79bab5..02556dc3e0 100644 --- a/Makefile +++ b/Makefile @@ -75,7 +75,7 @@ coverage: test-cpp: rm -rf ./BuildTests - cmake $(LIGHTNING_CPP_DIR) -BBuildTests -DBUILD_TESTS=ON -DENABLE_OPENMP=OFF + cmake $(LIGHTNING_CPP_DIR) -BBuildTests -DBUILD_TESTS=ON cmake --build ./BuildTests --target runner cmake --build ./BuildTests --target test From f05118ebb228ddefffc83e50579e8db4851ee4c9 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sun, 6 Mar 2022 10:16:05 -0500 Subject: [PATCH 42/94] Fix snapshot --- pennylane_lightning/lightning_qubit.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py index 014e4acb51..c25b700c7b 100644 --- a/pennylane_lightning/lightning_qubit.py +++ b/pennylane_lightning/lightning_qubit.py @@ -195,6 +195,8 @@ def apply_lightning(self, state, operations, dtype=np.complex128): for o in operations: name = o.name.split(".")[0] # The split is because inverse gates have .inv appended + if name == 'Snapshot': + continue method = getattr(sim, name, None) wires = self.wires.indices(o.wires) From 4bd95989e21fb38819ee1b2293ba18bdb275a0d4 Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Tue, 8 Mar 2022 04:28:41 +0000 Subject: [PATCH 43/94] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index b362bf426f..9ffb524fd2 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.22.0-dev13" +__version__ = "0.22.0-dev14" From 765c684a7d84d1bcbed4b10c4e1cca647774f54d Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 7 Mar 2022 23:28:51 -0500 Subject: [PATCH 44/94] Remove snapshot check in apply --- pennylane_lightning/lightning_qubit.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pennylane_lightning/lightning_qubit.py b/pennylane_lightning/lightning_qubit.py index d402d13513..9cc9277363 100644 --- a/pennylane_lightning/lightning_qubit.py +++ b/pennylane_lightning/lightning_qubit.py @@ -202,8 +202,6 @@ def apply_lightning(self, state, operations, dtype=np.complex128): for o in operations: name = o.name.split(".")[0] # The split is because inverse gates have .inv appended - if name == 'Snapshot': - continue method = getattr(sim, name, None) wires = self.wires.indices(o.wires) From 1c2c50ffdf1cc9e28e291975fc8b98de93209faf Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 8 Mar 2022 00:02:14 -0500 Subject: [PATCH 45/94] Update tests --- .github/workflows/tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2ba5498498..3d647dd8d0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -82,6 +82,9 @@ jobs: pip uninstall pennylane -y pip install git+https://github.com/PennyLaneAI/pennylane.git + - name: Install torch to test interface + run: pip install torch + - name: Install lightning.qubit device run: | cd main From b0207017d1bc244b570641ef3ad59a365de3570c Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 13:10:56 -0500 Subject: [PATCH 46/94] Fix some functions; Documents added --- .../src/algorithms/AdjointDiff.hpp | 4 +- pennylane_lightning/src/bindings/Bindings.cpp | 7 +- pennylane_lightning/src/bindings/Bindings.hpp | 36 +++++- .../src/gates/SelectKernel.hpp | 26 ++-- .../cpu_kernels/GateImplementationsLM.hpp | 2 +- .../src/simulator/CPUMemoryModel.hpp | 22 ++++ .../src/simulator/DispatchKeys.hpp | 7 + .../src/simulator/DynamicDispatcher.hpp | 30 +++-- .../src/simulator/Measures.hpp | 3 +- .../src/simulator/StateVectorBase.hpp | 121 +++--------------- .../src/simulator/StateVectorCPU.hpp | 34 +++++ .../src/simulator/StateVectorManagedCPU.hpp | 42 +++++- .../src/simulator/StateVectorRawCPU.hpp | 3 + pennylane_lightning/src/tests/Test_Util.cpp | 10 +- pennylane_lightning/src/util/BitUtil.hpp | 37 ++++-- .../src/util/IntegerInterval.hpp | 33 ++++- pennylane_lightning/src/util/Util.hpp | 15 ++- 17 files changed, 265 insertions(+), 167 deletions(-) diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp index d58dad3e7b..40766ac7de 100644 --- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp +++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp @@ -176,7 +176,7 @@ template class AdjointJacobian { #endif for (size_t h_i = 0; h_i < num_observables; h_i++) { try { - states[h_i].updateData(reference_state.getData()); + states[h_i].updateData(reference_state.getDataVector()); applyObservable(states[h_i], observables[h_i]); } catch (...) { #if defined(_OPENMP) @@ -363,7 +363,7 @@ template class AdjointJacobian { "differentiation method"); if ((ops_name[op_idx] != "QubitStateVector") && (ops_name[op_idx] != "BasisState")) { - mu.updateData(lambda.getData()); + mu.updateData(lambda.getDataVector()); applyOperationAdj(lambda, ops, op_idx); if (ops.hasParams(op_idx)) { diff --git a/pennylane_lightning/src/bindings/Bindings.cpp b/pennylane_lightning/src/bindings/Bindings.cpp index a0d39159f4..94db3aff37 100644 --- a/pennylane_lightning/src/bindings/Bindings.cpp +++ b/pennylane_lightning/src/bindings/Bindings.cpp @@ -56,13 +56,16 @@ void lightning_class_bindings(py::module_ &m) { //***********************************************************************// // std::string class_name = "StateVectorC" + bitsize; - auto pyclass = py::class_>(m, class_name.c_str(), - py::module_local()); + auto pyclass = py::class_>( + m, class_name.c_str(), py::module_local()); pyclass.def(py::init(&createRaw)); registerGatesForStateVector>(pyclass); + pyclass.def("kernel_map", &svKernelMap, + "Get internal kernels for operations"); + //***********************************************************************// // Observable //***********************************************************************// diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index dc5bcbc873..2ff03e6cdc 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -114,7 +114,7 @@ auto toNumpyArray(const StateVectorManagedCPU &sv) /** * @brief Get memory alignment of a given numpy array. * - * @param NumpyArray Pybind11's numpy array type. + * @param numpyArray Pybind11's numpy array type. * @return Memory model describing alignment */ auto getNumpyArrayAlignment(const pybind11::array &numpyArray) @@ -239,6 +239,40 @@ void registerGatesForStateVector(PyClass &pyclass) { }); } +/** + * @brief Get a gate kernel map for a statevector + */ +template +auto svKernelMap(const StateVectorRawCPU &sv) -> pybind11::dict { + pybind11::dict res_map; + namespace Constant = Gates::Constant; + + for (const auto &[gate_op, kernel] : sv.getGateKernelMap()) { + const auto key = + std::string(Util::lookup(Constant::gate_names, gate_op)); + const auto value = Util::lookup(Gates::kernel_id_name_pairs, kernel); + + res_map[key.c_str()] = value; + } + + for (const auto &[gntr_op, kernel] : sv.getGeneratorKernelMap()) { + const auto key = + std::string(Util::lookup(Constant::generator_names, gntr_op)); + const auto value = Util::lookup(Gates::kernel_id_name_pairs, kernel); + + res_map[key.c_str()] = value; + } + + for (const auto &[mat_op, kernel] : sv.getMatrixKernelMap()) { + const auto key = + std::string(Util::lookup(Constant::matrix_names, mat_op)); + const auto value = Util::lookup(Gates::kernel_id_name_pairs, kernel); + + res_map[key.c_str()] = value; + } + return res_map; +} + /** * @brief Return basic information of the compiled binary. */ diff --git a/pennylane_lightning/src/gates/SelectKernel.hpp b/pennylane_lightning/src/gates/SelectKernel.hpp index 5057ed9b42..a43428ddda 100644 --- a/pennylane_lightning/src/gates/SelectKernel.hpp +++ b/pennylane_lightning/src/gates/SelectKernel.hpp @@ -35,31 +35,23 @@ namespace Pennylane::Gates { * As Util::lookup can be used in constexpr context, this function is redundant * (by the standard). But GCC 9 still does not accept Util::lookup in constexpr * some cases. + * + * @tparam e Enum value + * @tparam T Value type of array + * @tparam size Size of the array + * + * @param arr Array of key, value pairs */ -///@{ -template -constexpr auto -static_lookup(const std::array, size> &arr) -> T { - for (size_t idx = 0; idx < size; idx++) { - if (std::get<0>(arr[idx]) == op) { - return std::get<1>(arr[idx]); - } - } - return T{}; -} - -template +template constexpr auto -static_lookup(const std::array, size> &arr) - -> T { +static_lookup(const std::array, size> &arr) -> T { for (size_t idx = 0; idx < size; idx++) { - if (std::get<0>(arr[idx]) == op) { + if (std::get<0>(arr[idx]) == e) { return std::get<1>(arr[idx]); } } return T{}; } -///@} /// @cond DEV namespace Internal { diff --git a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp index 87fb7469a8..1ecb8d7fbe 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp @@ -41,7 +41,7 @@ class GateImplementationsLM : public PauliGenerator { private: /* Alias utility functions */ static constexpr auto fillLeadingOnes = Util::fillLeadingOnes; - static constexpr auto fillTrailingOnes = Util::fillTrailingOnes; + static constexpr auto fillTrailingOnes = Util::fillTrailingOnes; static constexpr auto bitswap = Util::bitswap; public: diff --git a/pennylane_lightning/src/simulator/CPUMemoryModel.hpp b/pennylane_lightning/src/simulator/CPUMemoryModel.hpp index 282a80390f..09debc9804 100644 --- a/pennylane_lightning/src/simulator/CPUMemoryModel.hpp +++ b/pennylane_lightning/src/simulator/CPUMemoryModel.hpp @@ -24,6 +24,10 @@ #include namespace Pennylane { + +/** + * @brief Enum class for defining CPU memory alignments + */ enum class CPUMemoryModel : uint8_t { Unaligned, Aligned256, @@ -32,6 +36,12 @@ enum class CPUMemoryModel : uint8_t { BEGIN = Unaligned, }; +/** + * @brief Compute alignment of a given data pointer + * + * @param ptr Pointer to data + * @return CPUMemoryModel + */ inline auto getMemoryModel(const void *ptr) -> CPUMemoryModel { if ((reinterpret_cast(ptr) % 64) == 0) { return CPUMemoryModel::Aligned512; @@ -47,6 +57,8 @@ inline auto getMemoryModel(const void *ptr) -> CPUMemoryModel { /** * @brief Choose the best memory model to use using runtime/compile-time * information. + * + * @return CPUMemoryModel */ inline auto bestCPUMemoryModel() -> CPUMemoryModel { if constexpr (use_avx512f) { @@ -64,6 +76,11 @@ inline auto bestCPUMemoryModel() -> CPUMemoryModel { return CPUMemoryModel::Unaligned; } +/** + * @brief Return alignment of a given memory model. + * + * @tparam T Data type + */ template constexpr inline auto getAlignment(CPUMemoryModel memory_model) -> uint32_t { switch (memory_model) { @@ -79,6 +96,11 @@ constexpr inline auto getAlignment(CPUMemoryModel memory_model) -> uint32_t { PL_UNREACHABLE; } +/** + * @brief Get a corresponding allocator for standard library containers. + * + * @tparam T Data type + */ template constexpr auto getAllocator(CPUMemoryModel memory_model) -> AlignedAllocator { diff --git a/pennylane_lightning/src/simulator/DispatchKeys.hpp b/pennylane_lightning/src/simulator/DispatchKeys.hpp index a6d2f4ba94..34431c3ca6 100644 --- a/pennylane_lightning/src/simulator/DispatchKeys.hpp +++ b/pennylane_lightning/src/simulator/DispatchKeys.hpp @@ -34,6 +34,11 @@ enum class Threading : uint8_t { BEGIN = SingleThread, }; +/** + * @brief Compute dispatch key using threading and memory information. + * + * @return Dispatch key + */ constexpr uint32_t toDispatchKey(Threading threading, CPUMemoryModel memory_model) { /* Threading is in higher priority */ @@ -43,6 +48,8 @@ constexpr uint32_t toDispatchKey(Threading threading, /** * @brief Choose the best threading based on the current context. + * + * @return Threading */ inline auto bestThreading() -> Threading { #ifdef PL_USE_OMP diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp index d232c32ce6..0929528325 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp @@ -208,7 +208,7 @@ template class DynamicDispatcher { * @param kernel Kernel to run the gate operation. * @param data Pointer to data. * @param num_qubits Number of qubits. - * @param op_name Gate operation name. + * @param gate_op Gate operation. * @param wires Wires to apply gate to. * @param inverse Indicates whether to use inverse of gate. * @param params Optional parameter list for parametric gates. @@ -292,17 +292,29 @@ template class DynamicDispatcher { * @param wires Wires the gate applies to. * @param inverse Indicate whether inverse should be taken. */ - void applyMatrix(Gates::KernelType kernel, CFP_t *data, - Gates::MatrixOperation mat_op, size_t num_qubits, + void applyMatrix(Gates::KernelType kernel, CFP_t *data, size_t num_qubits, const std::complex *matrix, const std::vector &wires, bool inverse) const { + using Gates::MatrixOperation; assert(num_qubits >= wires.size()); - const auto iter = matrices_.find(std::make_pair(mat_op, kernel)); + const auto iter = [n_wires = wires.size(), kernel, this]() { + switch (n_wires) { + case 1: + return matrices_.find( + std::make_pair(MatrixOperation::SingleQubitOp, kernel)); + case 2: + return matrices_.find( + std::make_pair(MatrixOperation::TwoQubitOp, kernel)); + default: + return matrices_.find( + std::make_pair(MatrixOperation::MultiQubitOp, kernel)); + } + }(); if (iter == matrices_.end()) { throw std::invalid_argument( - std::string( - Util::lookup(Gates::Constant::matrix_names, mat_op)) + + std::string(Util::lookup(Gates::Constant::matrix_names, + (iter->first).first)) + " is not registered for the given kernel"); } (iter->second)(data, num_qubits, matrix, wires, inverse); @@ -317,8 +329,7 @@ template class DynamicDispatcher { * @param wires Wires the gate applies to. * @param inverse Indicate whether inverse should be taken. */ - void applyMatrix(Gates::KernelType kernel, CFP_t *data, - Gates::MatrixOperation mat_op, size_t num_qubits, + void applyMatrix(Gates::KernelType kernel, CFP_t *data, size_t num_qubits, const std::complex &matrix, const std::vector &wires, bool inverse) const { if (matrix.size() != Util::exp2(2 * wires.size())) { @@ -326,8 +337,7 @@ template class DynamicDispatcher { "The size of matrix does not match with the given " "number of wires"); } - applyMatrix(kernel, data, mat_op, num_qubits, matrix.data(), wires, - inverse); + applyMatrix(kernel, data, num_qubits, matrix.data(), wires, inverse); } /** diff --git a/pennylane_lightning/src/simulator/Measures.hpp b/pennylane_lightning/src/simulator/Measures.hpp index c158f7ed37..14e44fa4dc 100644 --- a/pennylane_lightning/src/simulator/Measures.hpp +++ b/pennylane_lightning/src/simulator/Measures.hpp @@ -77,8 +77,7 @@ class Measures { std::vector probs(const std::vector &wires) { // Determining index that would sort the vector. // This information is needed later. - const std::vector sorted_ind_wires( - Util::sorting_indices(wires)); + const auto sorted_ind_wires = Util::sorting_indices(wires); // Sorting wires. std::vector sorted_wires(wires.size()); for (size_t pos = 0; pos < wires.size(); pos++) { diff --git a/pennylane_lightning/src/simulator/StateVectorBase.hpp b/pennylane_lightning/src/simulator/StateVectorBase.hpp index 1b48512164..2eda32fc5b 100644 --- a/pennylane_lightning/src/simulator/StateVectorBase.hpp +++ b/pennylane_lightning/src/simulator/StateVectorBase.hpp @@ -278,76 +278,12 @@ template class StateVectorBase { const std::vector &wires, bool adj = false) -> PrecisionT { auto *arr = getData(); - auto &dispatcher = DynamicDispatcher::getInstance(); + const auto &dispatcher = DynamicDispatcher::getInstance(); return dispatcher.applyGenerator( getKernelForGenerator(dispatcher.strToGeneratorOp(opName)), arr, num_qubits_, opName, wires, adj); } - /** - * @brief Apply a general single qubit matrix to given wires. - * - * @param kernel Kernel to run the operation - * @param matrix Pointer to the array data. - * @param wires Wires to apply gate to. - * @param inverse Indicate whether inverse should be taken. - */ - inline void applySingleQubitOp(Gates::KernelType kernel, - const ComplexPrecisionT *matrix, - const std::vector &wires, - bool inverse = false) { - using Gates::MatrixOperation; - - assert(wires.size() == 1); - - auto &dispatcher = DynamicDispatcher::getInstance(); - auto *arr = getData(); - dispatcher.applyMatrix(kernel, arr, MatrixOperation::SingleQubitOp, - num_qubits_, matrix, wires, inverse); - } - - /** - * @brief Apply a general single qubit matrix to given wires. - * - * @param kernel Kernel to run the operation - * @param matrix Pointer to the array data. - * @param wires Wires to apply gate to. - * @param inverse Indicate whether inverse should be taken. - */ - inline void applyTwoQubitOp(Gates::KernelType kernel, - const ComplexPrecisionT *matrix, - const std::vector &wires, - bool inverse = false) { - using Gates::MatrixOperation; - - assert(wires.size() == 2); - - auto &dispatcher = DynamicDispatcher::getInstance(); - auto *arr = getData(); - dispatcher.applyMatrix(kernel, arr, MatrixOperation::TwoQubitOp, - num_qubits_, matrix, wires, inverse); - } - - /** - * @brief Apply a general multi qubit matrix to given wires. - * - * @param kernel Kernel to run the operation - * @param matrix Pointer to the array data. - * @param wires Wires to apply gate to. - * @param inverse Indicate whether inverse should be taken. - */ - inline void applyMultiQubitOp(Gates::KernelType kernel, - const ComplexPrecisionT *matrix, - const std::vector &wires, - bool inverse = false) { - using Gates::MatrixOperation; - - auto &dispatcher = DynamicDispatcher::getInstance(); - auto *arr = getData(); - dispatcher.applyMatrix(kernel, arr, MatrixOperation::MultiQubitOp, - num_qubits_, matrix, wires, inverse); - } - /** * @brief Apply a given matrix directly to the statevector read directly * from numpy data. Data can be in 1D or 2D format. @@ -363,7 +299,7 @@ template class StateVectorBase { bool inverse = false) { using Gates::MatrixOperation; - auto &dispatcher = DynamicDispatcher::getInstance(); + const auto &dispatcher = DynamicDispatcher::getInstance(); auto *arr = getData(); if (wires.empty()) { @@ -371,21 +307,8 @@ template class StateVectorBase { "Number of wires must be larger than 0"); } - switch (wires.size()) { - case 1: - dispatcher.applyMatrix(kernel, arr, MatrixOperation::SingleQubitOp, - num_qubits_, matrix, wires, inverse); - return; - case 2: - dispatcher.applyMatrix(kernel, arr, MatrixOperation::TwoQubitOp, - num_qubits_, matrix, wires, inverse); - return; - default: - dispatcher.applyMatrix(kernel, arr, MatrixOperation::MultiQubitOp, - num_qubits_, matrix, wires, inverse); - return; - } - PL_UNREACHABLE; + dispatcher.applyMatrix(kernel, arr, num_qubits_, matrix, wires, + inverse); } /** @@ -401,35 +324,22 @@ template class StateVectorBase { bool inverse = false) { using Gates::MatrixOperation; - auto &dispatcher = DynamicDispatcher::getInstance(); - auto *arr = getData(); - if (wires.empty()) { throw std::invalid_argument( "Number of wires must be larger than 0"); } - switch (wires.size()) { - case 1: - dispatcher.applyMatrix( - getKernelForMatrix(MatrixOperation::SingleQubitOp), arr, - MatrixOperation::SingleQubitOp, num_qubits_, matrix, wires, - inverse); - return; - case 2: - dispatcher.applyMatrix( - getKernelForMatrix(MatrixOperation::TwoQubitOp), arr, - MatrixOperation::TwoQubitOp, num_qubits_, matrix, wires, - inverse); - return; - default: - dispatcher.applyMatrix( - getKernelForMatrix(MatrixOperation::MultiQubitOp), arr, - MatrixOperation::MultiQubitOp, num_qubits_, matrix, wires, - inverse); - return; - } - PL_UNREACHABLE; + const auto kernel = [n_wires = wires.size(), this]() { + switch (n_wires) { + case 1: + return getKernelForMatrix(MatrixOperation::SingleQubitOp); + case 2: + return getKernelForMatrix(MatrixOperation::TwoQubitOp); + default: + return getKernelForMatrix(MatrixOperation::MultiQubitOp); + } + }(); + applyMatrix(kernel, matrix, wires, inverse); } template @@ -663,5 +573,4 @@ inline auto operator<<(std::ostream &out, const StateVectorBase &sv) return out; } - } // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/StateVectorCPU.hpp b/pennylane_lightning/src/simulator/StateVectorCPU.hpp index b113ece944..c815f1f9ad 100644 --- a/pennylane_lightning/src/simulator/StateVectorCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorCPU.hpp @@ -77,28 +77,56 @@ class StateVectorCPU : public StateVectorBase { } public: + /** + * @brief Get a kernel for a gate operation. + * + * @param gate_op Gate operation + * @return KernelType + */ [[nodiscard]] inline auto getKernelForGate(Gates::GateOperation gate_op) const -> Gates::KernelType { return kernel_for_gates_.at(gate_op); } + /** + * @brief Get a kernel for a gate operation. + * + * @param gntr_op Generator operation + * @return KernelType + */ [[nodiscard]] inline auto getKernelForGenerator(Gates::GeneratorOperation gntr_op) const -> Gates::KernelType { return kernel_for_generators_.at(gntr_op); } + /** + * @brief Get a kernel for a gate operation. + * + * @param mat_op Matrix operation + * @return KernelType + */ [[nodiscard]] inline auto getKernelForMatrix(Gates::MatrixOperation mat_op) const -> Gates::KernelType { return kernel_for_matrices_.at(mat_op); } + /** + * @brief Get memory model of the statevector + */ [[nodiscard]] inline CPUMemoryModel memoryModel() const { return memory_model_; } + + /** + * @brief Get threading of the statevector + */ [[nodiscard]] inline Threading threading() const { return threading_; } + /** + * @brief Get kernels for all gate operations. + */ [[nodiscard]] inline auto getGateKernelMap() const & -> const std::unordered_map & { return kernel_for_gates_; @@ -109,6 +137,9 @@ class StateVectorCPU : public StateVectorBase { return kernel_for_gates_; } + /** + * @brief Get kernels for all generator operations. + */ [[nodiscard]] inline auto getGeneratorKernelMap() const & -> const std::unordered_map & { return kernel_for_generators_; @@ -119,6 +150,9 @@ class StateVectorCPU : public StateVectorBase { return kernel_for_generators_; } + /** + * @brief Get kernels for all matrix operations. + */ [[nodiscard]] inline auto getMatrixKernelMap() const & -> const std::unordered_map & { return kernel_for_matrices_; diff --git a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp index be3edd53fc..df626a87a3 100644 --- a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp @@ -40,6 +40,13 @@ class StateVectorManagedCPU std::vector> data_; public: + /** + * @brief Create a new statevector + * + * @param num_qubits Number of qubits + * @param threading Threading option the statevector to use + * @param memory_model Memory model the statevector will use + */ explicit StateVectorManagedCPU( size_t num_qubits, Threading threading = bestThreading(), CPUMemoryModel memory_model = bestCPUMemoryModel()) @@ -49,6 +56,11 @@ class StateVectorManagedCPU data_[0] = {1, 0}; } + /** + * @brief Construct a statevector from another statevector + * + * @param other Another statevector to construct the statevector from + */ template explicit StateVectorManagedCPU( const StateVectorCPU &other) @@ -57,6 +69,14 @@ class StateVectorManagedCPU data_{other.getData(), other.getData() + other.getLength(), getAllocator(this->memory_model_)} {} + /** + * @brief Construct a statevector from data pointer + * + * @param other_data Data pointer to construct the statvector from. + * @param other_size Size of the data + * @param threading Threading option the statevector to use + * @param memory_model Memory model the statevector will use + */ StateVectorManagedCPU(const ComplexPrecisionT *other_data, size_t other_size, Threading threading = bestThreading(), @@ -68,13 +88,19 @@ class StateVectorManagedCPU "The size of provided data must be a power of 2."); } - // Clang-tidy gives false positive for delegating constructor + /** + * @brief Construct a statevector from a data vector + * + * @param other Data to construct the statevector from + * @param threading Threading option the statevector to use + * @param memory_model Memory model the statevector will use + */ template explicit StateVectorManagedCPU( - const std::vector, Alloc> &rhs, + const std::vector, Alloc> &other, Threading threading = bestThreading(), CPUMemoryModel memory_model = bestCPUMemoryModel()) - : StateVectorManagedCPU(rhs.data(), rhs.size(), threading, + : StateVectorManagedCPU(other.data(), other.size(), threading, memory_model) {} StateVectorManagedCPU(const StateVectorManagedCPU &rhs) = default; @@ -92,6 +118,9 @@ class StateVectorManagedCPU return data_.data(); } + /** + * @brief Get underlying data vector + */ [[nodiscard]] auto getDataVector() -> std::vector> & { @@ -108,8 +137,11 @@ class StateVectorManagedCPU * * @param new_data std::vector contains data. */ - void updateData(const ComplexPrecisionT *data) { - std::copy(data, data + BaseType::getLength(), data_.data()); + template + void updateData(const std::vector &new_data) { + assert(data_.size() == new_data.size()); + std::copy(new_data.data(), new_data.data() + new_data.size(), + data_.data()); } }; } // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/StateVectorRawCPU.hpp b/pennylane_lightning/src/simulator/StateVectorRawCPU.hpp index 57c0775774..92de97be20 100644 --- a/pennylane_lightning/src/simulator/StateVectorRawCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorRawCPU.hpp @@ -58,8 +58,11 @@ class StateVectorRawCPU /** * @brief Construct state-vector from a raw data pointer. * + * Memory model is automatically deduced from a pointer. + * * @param data Raw data pointer. * @param length The size of the data, i.e. 2^(number of qubits). + * @param threading Threading option the statevector to use */ StateVectorRawCPU(ComplexPrecisionT *data, size_t length, Threading threading = bestThreading()) diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp index adebf08c66..a01117ed98 100644 --- a/pennylane_lightning/src/tests/Test_Util.cpp +++ b/pennylane_lightning/src/tests/Test_Util.cpp @@ -575,11 +575,11 @@ TEST_CASE("Utility bit operations", "[Util][BitUtil]") { CHECK(Util::bitswap(0B001101, 0, 4) == 0B011100); } - SECTION("fillOnes") { - CHECK(Util::fillOnes(4) == 0B1111); - CHECK(Util::fillOnes(6) == 0B111111); - CHECK(Util::fillOnes(17) == 0B1'1111'1111'1111'1111); - CHECK(Util::fillOnes(54) == + SECTION("fillTrailingOnes") { + CHECK(Util::fillTrailingOnes(4) == 0B1111); + CHECK(Util::fillTrailingOnes(6) == 0B111111); + CHECK(Util::fillTrailingOnes(17) == 0B1'1111'1111'1111'1111); + CHECK(Util::fillTrailingOnes(54) == 0x3F'FFFF'FFFF'FFFF); // 54 == 4*13 + 2 } } diff --git a/pennylane_lightning/src/util/BitUtil.hpp b/pennylane_lightning/src/util/BitUtil.hpp index 83ffe49995..98ea5e2223 100644 --- a/pennylane_lightning/src/util/BitUtil.hpp +++ b/pennylane_lightning/src/util/BitUtil.hpp @@ -172,6 +172,11 @@ inline auto log2PerfectPower(unsigned long val) -> size_t { #endif ///@} +/** + * @brief Compute log2 of value in a compile-time. + * + * @param value Number to compute log2 + */ constexpr auto constLog2PerfectPower(size_t value) -> size_t { if (value == 0) { return 0; // not well defined. TODO: Raise an exception instead in @@ -185,6 +190,24 @@ constexpr auto constLog2PerfectPower(size_t value) -> size_t { return n; } +/** + * @brief Fill ones from LSB to nbits. Runnable in a compile-time and for any + * integer type. + * + * @tparam IntegerType Integer type to use + * @param nbits Number of bits to fill + */ +template +inline auto constexpr fillTrailingOnes(size_t nbits) -> IntegerType { + static_assert(std::is_integral_v && + std::is_unsigned_v); + + return (nbits == 0) ? 0 + : static_cast(~IntegerType(0)) >> + static_cast( + CHAR_BIT * sizeof(IntegerType) - nbits); +} + /** * @brief Check if there is a positive integer n such that value == 2^n. * @@ -194,12 +217,6 @@ constexpr auto constLog2PerfectPower(size_t value) -> size_t { inline auto isPerfectPowerOf2(size_t value) -> bool { return popcount(value) == 1; } -/** - * @brief Fill ones from LSB to rev_wire - */ -inline auto constexpr fillTrailingOnes(size_t pos) -> size_t { - return (pos == 0) ? 0 : (~size_t(0) >> (CHAR_BIT * sizeof(size_t) - pos)); -} /** * @brief Fill ones from MSB to pos */ @@ -216,12 +233,4 @@ inline auto constexpr bitswap(size_t bits, const size_t i, const size_t j) return bits ^ ((x << i) | (x << j)); } -template -inline auto constexpr fillOnes(size_t nbits) -> IntegerType { - static_assert(std::is_integral_v && - std::is_unsigned_v); - - return static_cast(~IntegerType(0)) >> - static_cast(CHAR_BIT * sizeof(IntegerType) - nbits); -} } // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/IntegerInterval.hpp b/pennylane_lightning/src/util/IntegerInterval.hpp index 06002ca3b0..565bf9de90 100644 --- a/pennylane_lightning/src/util/IntegerInterval.hpp +++ b/pennylane_lightning/src/util/IntegerInterval.hpp @@ -46,35 +46,61 @@ template class IntegerInterval { [[nodiscard]] IntegerType max() const { return max_; } }; +/** + * @brief Create integer interval (from, inf) + */ template auto larger_than(IntegerType from) -> IntegerInterval { return IntegerInterval{ from + 1, std::numeric_limits::max()}; } +/** + * @brief Create integer interval [from, inf) + */ template auto larger_than_equal_to(IntegerType from) -> IntegerInterval { return IntegerInterval{ from, std::numeric_limits::max()}; } +/** + * @brief Create integer interval [0, to) + */ template auto less_than(IntegerType to) -> IntegerInterval { return IntegerInterval{0, to}; } +/** + * @brief Create integer interval [0, to] + */ template auto less_than_equal_to(IntegerType to) -> IntegerInterval { return IntegerInterval{0, to + 1}; } + +/** + * @brief Create integer interval [from, to] + */ template auto in_between_closed(IntegerType from, IntegerType to) -> IntegerInterval { return IntegerInterval{from, to + 1}; } + +/** + * @brief Create integer interval [0, inf) + */ template constexpr auto full_domain() -> IntegerInterval { return IntegerInterval{ 0, std::numeric_limits::max()}; } +/** + * @brief + * @rst + * Test if :math:`I_1 \cap I_2 = \phi`. + * @endrst + */ template bool is_disjoint(const IntegerInterval &interval1, const IntegerInterval &interval2) { @@ -82,6 +108,12 @@ bool is_disjoint(const IntegerInterval &interval1, (interval2.max() <= interval1.min()); } +/** + * @brief + * @rst + * Create :math:`I_1 \cup I_2` + * @endrst + */ template auto union_interval(const IntegerInterval &interval1, const IntegerInterval &interval2) @@ -90,5 +122,4 @@ auto union_interval(const IntegerInterval &interval1, std::min(interval1.min(), interval2.min()), std::max(interval1.max(), interval2.max())}; } - } // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp index 2da023e2ea..a906c6fec2 100644 --- a/pennylane_lightning/src/util/Util.hpp +++ b/pennylane_lightning/src/util/Util.hpp @@ -260,10 +260,11 @@ auto linspace(T start, T end, size_t num_points) -> std::vector { * * @tparam T Vector data type. * @param arr Array to be inspected. + * @param length Size of the array * @return a vector with indices that would sort the array. */ template -inline auto sorting_indices(const T &arr, size_t length) +inline auto sorting_indices(const T *arr, size_t length) -> std::vector { std::vector indices(length); iota(indices.begin(), indices.end(), 0); @@ -436,15 +437,27 @@ template void for_each_enum(Func &&func) { } } +/** + * @brief Get common alignment of given kernels + * + * @tparam PrecisionT Floating point type + * @tparam TypeList Type list of kernels to calculate common alignment + */ template struct common_alignment { constexpr static size_t value = std::max(TypeList::Type::template required_alignment, common_alignment::value); }; + +/// @cond DEV template struct common_alignment { constexpr static size_t value = std::alignment_of_v; }; +/// @endcond +/** + * @brief A value alias for common_alignment + */ template [[maybe_unused]] constexpr static size_t common_alignment_v = common_alignment::value; From cef939ce4af2d0fc269d7144e16000a86569a0c6 Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Wed, 9 Mar 2022 18:13:59 +0000 Subject: [PATCH 47/94] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index fd08943a9b..7b13995518 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.22.0-dev16" +__version__ = "0.22.0-dev17" From ac8ef319a941d54cf43eb13f227a16b17d8bde24 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 13:14:41 -0500 Subject: [PATCH 48/94] Trigger CI From bbd5840b5d024063f3f9578f7063d985ed5a6393 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 13:21:42 -0500 Subject: [PATCH 49/94] Fix namespace --- pennylane_lightning/src/tests/Test_Util.cpp | 1 + pennylane_lightning/src/util/LinearAlgebra.hpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp index a01117ed98..8d1cd15018 100644 --- a/pennylane_lightning/src/tests/Test_Util.cpp +++ b/pennylane_lightning/src/tests/Test_Util.cpp @@ -14,6 +14,7 @@ #include "TestHelpers.hpp" using namespace Pennylane; +using namespace Pennylane::Util; /** * @brief This tests the compile-time calculation of a given scalar diff --git a/pennylane_lightning/src/util/LinearAlgebra.hpp b/pennylane_lightning/src/util/LinearAlgebra.hpp index f6cca5f397..1cf36e7be9 100644 --- a/pennylane_lightning/src/util/LinearAlgebra.hpp +++ b/pennylane_lightning/src/util/LinearAlgebra.hpp @@ -50,13 +50,13 @@ using CBLAS_LAYOUT = enum CBLAS_LAYOUT { /// @endcond // +namespace Pennylane::Util { enum class Trans : int { NoTranspose = CblasNoTrans, Transpose = CblasTrans, Adjoint = CblasConjTrans }; -namespace Pennylane::Util { /** * @brief Calculates the inner-product using OpenMP. * From 13009dec9d4b2b9ed1c60a7dfe40fd02af286d3f Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 13:35:04 -0500 Subject: [PATCH 50/94] Fix namespace --- doc/add_kernel.rst | 6 ++++ .../src/simulator/CPUMemoryModel.hpp | 4 +-- .../src/simulator/StateVectorManagedCPU.hpp | 12 ++++---- pennylane_lightning/src/tests/TestHelpers.hpp | 5 ++-- pennylane_lightning/src/util/Memory.hpp | 30 +++++++++---------- pennylane_lightning/src/util/Util.hpp | 26 ---------------- 6 files changed, 33 insertions(+), 50 deletions(-) diff --git a/doc/add_kernel.rst b/doc/add_kernel.rst index 3c365fe62f..821f99a64c 100644 --- a/doc/add_kernel.rst +++ b/doc/add_kernel.rst @@ -16,6 +16,12 @@ We discuss how one can add another gate implementation in this document. Assume constexpr static kernel_id = KernelType::Mykernel; // Will be discussed below constexpr static std::string_view = "MyGateImpl"; // Name of your kernel + template + constexpr static size_t required_alignment = + std::alignment_of_v; + template + constexpr static size_t packed_bytes = sizeof(PrecisionT); + template static void applyPauliX(std::complex* data, size_t num_qubits, diff --git a/pennylane_lightning/src/simulator/CPUMemoryModel.hpp b/pennylane_lightning/src/simulator/CPUMemoryModel.hpp index 09debc9804..c7bf4f42bc 100644 --- a/pennylane_lightning/src/simulator/CPUMemoryModel.hpp +++ b/pennylane_lightning/src/simulator/CPUMemoryModel.hpp @@ -103,7 +103,7 @@ constexpr inline auto getAlignment(CPUMemoryModel memory_model) -> uint32_t { */ template constexpr auto getAllocator(CPUMemoryModel memory_model) - -> AlignedAllocator { - return AlignedAllocator{getAlignment(memory_model)}; + -> Util::AlignedAllocator { + return Util::AlignedAllocator{getAlignment(memory_model)}; } } // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp index df626a87a3..35e93478e1 100644 --- a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp @@ -37,7 +37,8 @@ class StateVectorManagedCPU private: using BaseType = StateVectorCPU; - std::vector> data_; + std::vector> + data_; public: /** @@ -122,13 +123,14 @@ class StateVectorManagedCPU * @brief Get underlying data vector */ [[nodiscard]] auto getDataVector() - -> std::vector> - & { + -> std::vector> & { return data_; } - [[nodiscard]] auto getDataVector() const -> const - std::vector> & { + [[nodiscard]] auto getDataVector() const + -> const std::vector> & { return data_; } diff --git a/pennylane_lightning/src/tests/TestHelpers.hpp b/pennylane_lightning/src/tests/TestHelpers.hpp index ab383909d4..5078d68428 100644 --- a/pennylane_lightning/src/tests/TestHelpers.hpp +++ b/pennylane_lightning/src/tests/TestHelpers.hpp @@ -151,9 +151,10 @@ isApproxEqual(const Data_t &data1, const Data_t &data2, template constexpr static auto test_allocator = - AlignedAllocator{Util::common_alignment_v}; + Util::AlignedAllocator{Util::common_alignment_v}; -template using TestVector = std::vector>; +template +using TestVector = std::vector>; /** * @brief Multiplies every value in a dataset by a given complex scalar value. diff --git a/pennylane_lightning/src/util/Memory.hpp b/pennylane_lightning/src/util/Memory.hpp index e3779e42f7..9d55b20679 100644 --- a/pennylane_lightning/src/util/Memory.hpp +++ b/pennylane_lightning/src/util/Memory.hpp @@ -22,7 +22,7 @@ #include "BitUtil.hpp" #include "TypeList.hpp" -namespace Pennylane { +namespace Pennylane::Util { /** * @brief Custom aligned allocate function. As appleclang does not support * std::aligned_alloc in Mac OS 10.14, we use posix_memalign function. @@ -161,16 +161,6 @@ bool operator!=([[maybe_unused]] const AlignedAllocator &lhs, } ///@cond DEV -template struct commonAlignmentHelper { - constexpr static uint32_t value = - std::max(TypeList::Type::packed_bytes, - commonAlignmentHelper::value); -}; -template <> struct commonAlignmentHelper { - constexpr static uint32_t value = 4U; -}; -///@endcond - /** * @brief This function calculate the common multiplier of alignments of the * given kernels in TypeList. @@ -180,7 +170,17 @@ template <> struct commonAlignmentHelper { * * @tparam TypeList Type list of kernels. */ -template -[[maybe_unused]] constexpr static size_t common_alignment = - commonAlignmentHelper::value; -} // namespace Pennylane +template struct commonAlignmentHelper { + constexpr static size_t value = std::max( + TypeList::Type::template required_alignment, + commonAlignmentHelper::value); +}; +template struct commonAlignmentHelper { + constexpr static size_t value = 1; +}; +/// @endcond +template +[[maybe_unused]] constexpr static auto common_alignment_v = + commonAlignmentHelper::value; + +} // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp index a906c6fec2..03a3455967 100644 --- a/pennylane_lightning/src/util/Util.hpp +++ b/pennylane_lightning/src/util/Util.hpp @@ -436,30 +436,4 @@ template void for_each_enum(Func &&func) { } } } - -/** - * @brief Get common alignment of given kernels - * - * @tparam PrecisionT Floating point type - * @tparam TypeList Type list of kernels to calculate common alignment - */ -template struct common_alignment { - constexpr static size_t value = - std::max(TypeList::Type::template required_alignment, - common_alignment::value); -}; - -/// @cond DEV -template struct common_alignment { - constexpr static size_t value = std::alignment_of_v; -}; -/// @endcond - -/** - * @brief A value alias for common_alignment - */ -template -[[maybe_unused]] constexpr static size_t common_alignment_v = - common_alignment::value; - } // namespace Pennylane::Util From df068a8eaaf4482958fda5311e3eec86d4abef36 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 16:47:37 -0500 Subject: [PATCH 51/94] Fix some docs --- doc/add_kernel.rst | 57 +++++++++---------- .../src/simulator/DynamicDispatcher.hpp | 20 +++++-- pennylane_lightning/src/util/Memory.hpp | 20 ++++--- 3 files changed, 55 insertions(+), 42 deletions(-) diff --git a/doc/add_kernel.rst b/doc/add_kernel.rst index 821f99a64c..2b8d99cc8e 100644 --- a/doc/add_kernel.rst +++ b/doc/add_kernel.rst @@ -13,14 +13,13 @@ We discuss how one can add another gate implementation in this document. Assume constexpr static std::array implemented_gates = { GateOperation::PauliX }; // List of implemented gates - constexpr static kernel_id = KernelType::Mykernel; // Will be discussed below + constexpr static kernel_id = KernelType::MyKernel; // Will be discussed below constexpr static std::string_view = "MyGateImpl"; // Name of your kernel + /* This defines the required alignment for this kernel. If there is no special requirement, + using std::alignment_of_v is sufficient. */ template - constexpr static size_t required_alignment = - std::alignment_of_v; - template - constexpr static size_t packed_bytes = sizeof(PrecisionT); + constexpr static size_t required_alignment = std::alignment_of_v; template static void applyPauliX(std::complex* data, @@ -51,8 +50,9 @@ and // file: simulator/AvailableKernels.hpp namespace Pennylane { using AvailableKernels = Util::TypeList; + GateImplementationsPI, + MyGateImplementation /* This is added*/, + void>; } // namespace Pennylane @@ -68,48 +68,47 @@ Now you can call your kernel functions in C++. // call using the dynamic dispatcher sv.applyOperation(KernelType::MyKernel, "PauliX", /*wires=*/{0}, /*inverse=*/false); -To export your gate implementation to python, you also need to add your kernel to ``kernels_to_pyexport``: +Still, note that your gate implementation is not a default implementation for ``PauliX`` gate yet, i.e., .. code-block:: cpp - // file: simulator/KernelType.hpp - [[maybe_unused]] constexpr std::array kernels_to_pyexport = { - KernelType::PI, KernelType::LM, KernelType::Mykernel /* This is added */ - }; + sv.applyOperation("PauliX", {0}, false) // still call the default implementation -Then you can find ``PauliX_MyKernel`` function in ``lightning_qubit_ops`` Python module. - -Still, note that your gate implementation is not a default implementation for ``PauliX`` gate yet, i.e., +To make your gate implementation default, you need to change registered ``KernelMap``. +Thus changing the following lines .. code-block:: cpp - sv.applyPauliX({0}, false); // still call the default implementation - sv.applyOperation("PauliX", {0}, false) // still call the default implementation + // simulator/Kernel.cpp -To make your gate implementation default, you need to change ``default_kernel_for_ops`` constant. Thus changing + int assignDefaultKernelsForGateOp() { + auto &instance = OperationKernelMap::getInstance(); -.. code-block:: cpp + instance.assignKernelForOp(GateOperation::PauliX, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::LM); - // file: simulator/Constant.hpp - constexpr std::array default_kernel_for_gates = { - std::pair{GateOperations::PauliX, KernelType::LM}, - std::pair{GateOperations::PauliY, KernelType::LM}, ... } -to +to .. code-block:: cpp - constexpr std::array default_kernel_for_gates = { - std::pair{GateOperations::PauliX, KernelType::MyKernel}, - std::pair{GateOperations::PauliY, KernelType::LM}, + int assignDefaultKernelsForGateOp() { + auto &instance = OperationKernelMap::getInstance(); + + instance.assignKernelForOp(GateOperation::PauliX, all_threading, + all_memory_model, all_qubit_numbers, + Gates::KernelType::MyKernel); + ... } will make your implementation as default kernel for ``PauliX`` gate (for all C++ calls as well as for the Python binding). -Gate generators can also be handled in the same way. +Gate generators can also be handled in the same way. Note that it is possible to assign the kernel only for specific memory model or +threading operations. Check overloaded functions :cpp:func:`Pennylane::KernelMap::OperationKernelMap::assignKernelForOp` for details. Test your gate implementation ============================= @@ -120,7 +119,7 @@ To test your own kernel implementations, you can go to ``tests/TestKernels.hpp`` using TestKernels = Pennylane::Util::TypeList; + MyGateImplementation /*This is added */, void>; It will automatically test your gate implementation. Note that, in the current implementation, this will test a gate if ``apply + gate name`` is defined even when the gate is not included in ``implemented_gates`` variable. diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp index 0929528325..91dc499194 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp @@ -130,11 +130,21 @@ template class DynamicDispatcher { return singleton; } + /** + * @brief Gate name to gate operation + * + * @param gate_name Gate name + */ [[nodiscard]] auto strToGateOp(const std::string &gate_name) const -> Gates::GateOperation { return str_to_gates_.at(gate_name); } + /** + * @brief Generator name to generator operation + * + * @param gntr_name Generator name + */ [[nodiscard]] auto strToGeneratorOp(const std::string &gntr_name) const -> Gates::GeneratorOperation { return str_to_gntrs_.at(gntr_name); @@ -286,7 +296,8 @@ template class DynamicDispatcher { /** * @brief Apply a given matrix directly to the statevector. * - * @param arr Pointer to the statevector. + * @param kernel Kernel to use for this operation + * @param data Pointer to the statevector. * @param num_qubits Number of qubits. * @param matrix Perfect square matrix in row-major order. * @param wires Wires the gate applies to. @@ -323,14 +334,15 @@ template class DynamicDispatcher { /** * @brief Apply a given matrix directly to the statevector. * - * @param arr Pointer to the statevector. + * @param kernel Kernel to use for this operation + * @param data Pointer to the statevector. * @param num_qubits Number of qubits. * @param matrix Perfect square matrix in row-major order. * @param wires Wires the gate applies to. * @param inverse Indicate whether inverse should be taken. */ void applyMatrix(Gates::KernelType kernel, CFP_t *data, size_t num_qubits, - const std::complex &matrix, + const std::vector> &matrix, const std::vector &wires, bool inverse) const { if (matrix.size() != Util::exp2(2 * wires.size())) { throw std::invalid_argument( @@ -347,7 +359,7 @@ template class DynamicDispatcher { * @param kernel Kernel to run the gate operation. * @param data Pointer to data. * @param num_qubits Number of qubits. - * @param op_name Gate operation name. + * @param gntr_op Generator operation. * @param wires Wires to apply gate to. * @param adj Indicates whether to use adjoint of gate. */ diff --git a/pennylane_lightning/src/util/Memory.hpp b/pennylane_lightning/src/util/Memory.hpp index 9d55b20679..65294b07a0 100644 --- a/pennylane_lightning/src/util/Memory.hpp +++ b/pennylane_lightning/src/util/Memory.hpp @@ -161,15 +161,6 @@ bool operator!=([[maybe_unused]] const AlignedAllocator &lhs, } ///@cond DEV -/** - * @brief This function calculate the common multiplier of alignments of the - * given kernels in TypeList. - * - * As all alignment must be a power of 2, we just can choose the maximum - * alignment. - * - * @tparam TypeList Type list of kernels. - */ template struct commonAlignmentHelper { constexpr static size_t value = std::max( TypeList::Type::template required_alignment, @@ -179,6 +170,17 @@ template struct commonAlignmentHelper { constexpr static size_t value = 1; }; /// @endcond + +/** + * @brief This function calculate the common multiplier of alignments of the + * given kernels in TypeList. + * + * As all alignment must be a power of 2, we just can choose the maximum + * alignment. + * + * @tparam PrecisionT Floating point type + * @tparam TypeList Type list of kernels. + */ template [[maybe_unused]] constexpr static auto common_alignment_v = commonAlignmentHelper::value; From a6c629ce5335ad4d32dd74080eab112c5caa4754 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 17:48:18 -0500 Subject: [PATCH 52/94] Fix for tidy --- CMakeLists.txt | 1 + .../src/tests/CompareVector.hpp | 18 ++++++++++++++++++ pennylane_lightning/src/tests/TestHelpers.hpp | 18 ++---------------- .../Test_GateImplementations_Nonparam.cpp | 1 + 4 files changed, 22 insertions(+), 16 deletions(-) create mode 100644 pennylane_lightning/src/tests/CompareVector.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 58b4faab21..89f665e31e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,6 +42,7 @@ option(ENABLE_BLAS "Enable BLAS" OFF) option(BUILD_TESTS "Build cpp tests" OFF) option(BUILD_EXAMPLES "Build cpp examples" OFF) + # Process compile options include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/process_options.cmake") diff --git a/pennylane_lightning/src/tests/CompareVector.hpp b/pennylane_lightning/src/tests/CompareVector.hpp new file mode 100644 index 0000000000..454e71e535 --- /dev/null +++ b/pennylane_lightning/src/tests/CompareVector.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +template +bool operator==(const std::vector &lhs, + const std::vector &rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + for (size_t idx = 0; idx < lhs.size(); idx++) { + if (lhs[idx] != rhs[idx]) { + return false; + } + } + return true; +} diff --git a/pennylane_lightning/src/tests/TestHelpers.hpp b/pennylane_lightning/src/tests/TestHelpers.hpp index 5078d68428..68464ee8c4 100644 --- a/pennylane_lightning/src/tests/TestHelpers.hpp +++ b/pennylane_lightning/src/tests/TestHelpers.hpp @@ -8,6 +8,8 @@ #include "TestKernels.hpp" #include "Util.hpp" +#include + #include #include #include @@ -15,8 +17,6 @@ #include #include -#include - namespace Pennylane { template struct remove_complex { using type = T; }; template struct remove_complex> { @@ -97,20 +97,6 @@ bool operator!=(const std::vector &lhs, return !rhs.compare(lhs); } -template -bool operator==(const std::vector &lhs, - const std::vector &rhs) { - if (lhs.size() != rhs.size()) { - return false; - } - for (size_t idx = 0; idx < lhs.size(); idx++) { - if (lhs[idx] != rhs[idx]) { - return false; - } - } - return true; -} - /** * @brief Utility function to compare complex statevector data. * diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp index 2dde03af2b..a44752e0a3 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp @@ -1,3 +1,4 @@ +#include "CompareVector.hpp" #include "TestHelpers.hpp" #include "TestKernels.hpp" #include "Util.hpp" From b6569dc5a90d4ae22012812d77d9f24afe4c3500 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 19:26:04 -0500 Subject: [PATCH 53/94] Update Util; Update doc --- .github/workflows/format.yml | 2 +- bin/cpp-files | 7 +- bin/utils.py | 15 +- doc/_ext/edit_on_github.py | 27 ++-- doc/conf.py | 94 +++++------ doc/directives.py | 28 ++-- pennylane_lightning/src/util/CMakeLists.txt | 8 +- pennylane_lightning/src/util/ConstantUtil.hpp | 11 +- .../src/util/LinearAlgebra.hpp | 106 ++++++++++--- pennylane_lightning/src/util/Macros.hpp | 147 +++++++++++++++++- pennylane_lightning/src/util/RuntimeInfo.cpp | 71 +++++++++ pennylane_lightning/src/util/RuntimeInfo.hpp | 52 +++++++ pennylane_lightning/src/util/TypeList.hpp | 52 ++++++- pennylane_lightning/src/util/Util.hpp | 3 +- tests/test_measures.py | 18 ++- 15 files changed, 524 insertions(+), 117 deletions(-) create mode 100644 pennylane_lightning/src/util/RuntimeInfo.cpp create mode 100644 pennylane_lightning/src/util/RuntimeInfo.hpp diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 347f111cae..b4a91f94a5 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -45,7 +45,7 @@ jobs: python-version: 3.7 - name: Install dependencies - run: sudo apt update && sudo apt -y install clang-tidy-12 cmake g++ + run: sudo apt update && sudo apt -y install clang-tidy-12 cmake g++ libomp-12-dev env: DEBIAN_FRONTEND: noninteractive diff --git a/bin/cpp-files b/bin/cpp-files index b09cc88cf1..7ccd202783 100755 --- a/bin/cpp-files +++ b/bin/cpp-files @@ -14,6 +14,9 @@ if __name__ == '__main__': parser = argparse.ArgumentParser( description="Output C/C++ files in json list" ) + parser.add_argument( + "--header-only", action='store_true', dest='header_only', help="whether only include header files" + ) parser.add_argument( "paths", nargs="+", metavar="DIR", help="paths to the root source directories" ) @@ -23,9 +26,9 @@ if __name__ == '__main__': args = parser.parse_args() - files = set(get_cpp_files(args.paths)) + files = set(get_cpp_files(args.paths, header_only = args.header_only)) if args.exclude_dirs: - files_excludes = set(get_cpp_files(args.exclude_dirs)) + files_excludes = set(get_cpp_files(args.exclude_dirs, header_only = args.header_only)) files -= files_excludes json.dump(list(files), sys.stdout) diff --git a/bin/utils.py b/bin/utils.py index 90d1693031..6d9dab9420 100644 --- a/bin/utils.py +++ b/bin/utils.py @@ -2,13 +2,14 @@ import re import fnmatch -SRCFILE_EXT = ("c", "cc", "cpp", "cxx", "h", "hh", "hpp", "hxx", "cu", "cuh") +SRCFILE_EXT = ["c", "cc", "cpp", "cxx", "cu"] +HEADERFILE_EXT = ["h", "hh", "hpp", "hxx", "cuh"] LIGHTNING_SOURCE_DIR = Path(__file__).resolve().parent.parent rgx_gitignore_comment = re.compile("#.*$") -def get_cpp_files_from_path(path, ignore_patterns = None, use_gitignore = True): +def get_cpp_files_from_path(path, ignore_patterns = None, use_gitignore = True, header_only = False): """return set of C++ source files from a path Args: @@ -18,7 +19,11 @@ def get_cpp_files_from_path(path, ignore_patterns = None, use_gitignore = True): """ path = Path(path) files_rel = set() # file paths relative to path - for ext in SRCFILE_EXT: + + exts = HEADERFILE_EXT + if not header_only: + exts += SRCFILE_EXT + for ext in exts: for file_path in path.rglob(f"*.{ext}"): files_rel.add(file_path.relative_to(path)) @@ -46,7 +51,7 @@ def get_cpp_files_from_path(path, ignore_patterns = None, use_gitignore = True): return set(str(path.joinpath(f)) for f in files_rel) -def get_cpp_files(paths, ignore_patterns = None, use_gitignore = True): +def get_cpp_files(paths, ignore_patterns = None, use_gitignore = True, header_only = False): """return list of C++ source files from paths. Args: @@ -56,5 +61,5 @@ def get_cpp_files(paths, ignore_patterns = None, use_gitignore = True): """ files = set() for path in paths: - files |= get_cpp_files_from_path(path, ignore_patterns, use_gitignore) + files |= get_cpp_files_from_path(path, ignore_patterns, use_gitignore, header_only) return list(files) diff --git a/doc/_ext/edit_on_github.py b/doc/_ext/edit_on_github.py index b69348d97d..954ed00ab4 100644 --- a/doc/_ext/edit_on_github.py +++ b/doc/_ext/edit_on_github.py @@ -8,19 +8,20 @@ import warnings -__licence__ = 'BSD (3 clause)' +__licence__ = "BSD (3 clause)" def get_github_url(app, view, path): - return 'https://github.com/{project}/{view}/{branch}/{path}'.format( + return "https://github.com/{project}/{view}/{branch}/{path}".format( project=app.config.edit_on_github_project, view=view, branch=app.config.edit_on_github_branch, - path=path) + path=path, + ) def html_page_context(app, pagename, templatename, context, doctree): - if templatename != 'page.html': + if templatename != "page.html": return if not app.config.edit_on_github_project: @@ -29,16 +30,16 @@ def html_page_context(app, pagename, templatename, context, doctree): if not doctree: return - - path = os.path.relpath(doctree.get('source'), app.builder.srcdir) - show_url = get_github_url(app, 'blob', path) - edit_url = get_github_url(app, 'edit', path) - context['show_on_github_url'] = show_url - context['edit_on_github_url'] = edit_url + path = os.path.relpath(doctree.get("source"), app.builder.srcdir) + show_url = get_github_url(app, "blob", path) + edit_url = get_github_url(app, "edit", path) + + context["show_on_github_url"] = show_url + context["edit_on_github_url"] = edit_url def setup(app): - app.add_config_value('edit_on_github_project', '', True) - app.add_config_value('edit_on_github_branch', 'master', True) - app.connect('html-page-context', html_page_context) \ No newline at end of file + app.add_config_value("edit_on_github_project", "", True) + app.add_config_value("edit_on_github_branch", "master", True) + app.connect("html-page-context", html_page_context) diff --git a/doc/conf.py b/doc/conf.py index 770f5434dc..37a6be4452 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -20,46 +20,50 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath('')) -sys.path.insert(0, os.path.abspath('_ext')) -sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath('doc')), 'doc')) +sys.path.insert(0, os.path.abspath("")) +sys.path.insert(0, os.path.abspath("_ext")) +sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath("doc")), "doc")) # For obtaining all relevant C++ source files -currdir = Path(__file__).resolve().parent # PROJECT_SOURCE_DIR/docs +currdir = Path(__file__).resolve().parent # PROJECT_SOURCE_DIR/docs PROJECT_SOURCE_DIR = currdir.parent -CPP_SOURCE_DIR = PROJECT_SOURCE_DIR.joinpath('pennylane_lightning/src') -CPP_EXCLUDE_DIRS = ['examples', 'tests'] # relative to CPP_SOURCE_DIR +CPP_SOURCE_DIR = PROJECT_SOURCE_DIR.joinpath("pennylane_lightning/src") +CPP_EXCLUDE_DIRS = ["examples", "tests"] # relative to CPP_SOURCE_DIR + def obtain_cpp_files(): - script_path = PROJECT_SOURCE_DIR.joinpath('bin/cpp-files') + script_path = PROJECT_SOURCE_DIR.joinpath("bin/cpp-files") if not script_path.exists(): - print('The project directory structure is corrupted.') + print("The project directory structure is corrupted.") sys.exit(1) exclude_dirs = [CPP_SOURCE_DIR.joinpath(exclude_dir) for exclude_dir in CPP_EXCLUDE_DIRS] - p = subprocess.run([str(script_path), CPP_SOURCE_DIR, '--exclude-dirs', *exclude_dirs], capture_output = True) + p = subprocess.run( + [str(script_path), "--header-only", CPP_SOURCE_DIR, "--exclude-dirs", *exclude_dirs], + capture_output=True, + ) file_list = json.loads(p.stdout) - file_list = ['../' + str(Path(f).relative_to(PROJECT_SOURCE_DIR)) for f in file_list] + file_list = ["../" + str(Path(f).relative_to(PROJECT_SOURCE_DIR)) for f in file_list] return file_list + CPP_FILES = obtain_cpp_files() print(CPP_FILES) - class Mock(MagicMock): - __name__ = 'foo' + __name__ = "foo" @classmethod def __getattr__(cls, name): return MagicMock() -MOCK_MODULES = ['pennylane_lightning.lightning_qubit_ops'] +MOCK_MODULES = ["pennylane_lightning.lightning_qubit_ops"] mock = Mock() for mod_name in MOCK_MODULES: @@ -68,7 +72,7 @@ def __getattr__(cls, name): # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -needs_sphinx = '1.6' +needs_sphinx = "1.6" # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -85,9 +89,9 @@ def __getattr__(cls, name): "sphinx.ext.mathjax", "sphinx.ext.napoleon", "sphinx.ext.todo", - 'sphinx.ext.viewcode', + "sphinx.ext.viewcode", "sphinx_automodapi.automodapi", - 'sphinx_automodapi.smart_resolver' + "sphinx_automodapi.smart_resolver", ] intersphinx_mapping = {"https://pennylane.readthedocs.io/en/stable/": None} @@ -114,10 +118,7 @@ def __getattr__(cls, name): # TIP: if using the sphinx-bootstrap-theme, you need # "treeViewIsBootstrap": True, "exhaleExecutesDoxygen": True, - "exhaleDoxygenStdin": ( - "INPUT = " + ' '.join(CPP_FILES) + ' ' - "EXCLUDE_SYMBOLS = std::* " - ), + "exhaleDoxygenStdin": ("INPUT = " + " ".join(CPP_FILES) + " " "EXCLUDE_SYMBOLS = std::* "), "afterTitleDescription": inspect.cleandoc( """ The Pennylane Lightning C++ API is intended to be called from Python through Pybind11. Direct use of the C++ API is currently unsupported and is provided for reference only. @@ -126,21 +127,21 @@ def __getattr__(cls, name): } # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates', 'xanadu_theme'] +templates_path = ["_templates", "xanadu_theme"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'PennyLane-Lightning' +project = "PennyLane-Lightning" copyright = "Copyright 2021" -author = 'Xanadu Inc.' +author = "Xanadu Inc." add_module_names = False @@ -149,11 +150,12 @@ def __getattr__(cls, name): # built documents. import pennylane_lightning + # The full version, including alpha/beta/rc tags. release = pennylane_lightning.__version__ # The short X.Y version. -version = re.match(r'^(\d+\.\d+)', release).expand(r'\1') +version = re.match(r"^(\d+\.\d+)", release).expand(r"\1") # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -163,19 +165,19 @@ def __getattr__(cls, name): language = None # today_fmt is used as the format for a strftime call. -today_fmt = '%Y-%m-%d' +today_fmt = "%Y-%m-%d" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. show_authors = True # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True @@ -186,12 +188,12 @@ def __getattr__(cls, name): # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -html_favicon = '_static/favicon.ico' +html_favicon = "_static/favicon.ico" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -199,26 +201,24 @@ def __getattr__(cls, name): # This is required for the alabaster theme # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars html_sidebars = { - '**' : [ - 'logo-text.html', - 'searchbox.html', - 'globaltoc.html', + "**": [ + "logo-text.html", + "searchbox.html", + "globaltoc.html", ] } # -- Xanadu theme --------------------------------------------------------- -html_theme = 'xanadu_theme' -html_theme_path = ['.'] +html_theme = "xanadu_theme" +html_theme_path = ["."] # xanadu theme options (see theme.conf for more information) html_theme_options = { # Set the name of the project to appear in the left sidebar. "project_nav_name": "PennyLane-Lightning", - # Path to a touch icon "touch_icon": "logo_new.png", - "large_toc": True, "navigation_button": "#19b37b", "navigation_button_hover": "#0e714d", @@ -229,22 +229,22 @@ def __getattr__(cls, name): "download_button": "#19b37b", } -edit_on_github_project = 'XanaduAI/pennylane-lightning' -edit_on_github_branch = 'master/doc' +edit_on_github_project = "XanaduAI/pennylane-lightning" +edit_on_github_branch = "master/doc" -#============================================================ +# ============================================================ # the order in which autodoc lists the documented members -autodoc_member_order = 'bysource' +autodoc_member_order = "bysource" # inheritance_diagram graphviz attributes -inheritance_node_attrs = dict(color='lightskyblue1', style='filled') +inheritance_node_attrs = dict(color="lightskyblue1", style="filled") -#autodoc_default_flags = ['members'] +# autodoc_default_flags = ['members'] autosummary_generate = True from directives import CustomDeviceGalleryItemDirective -def setup(app): - app.add_directive('devicegalleryitem', CustomDeviceGalleryItemDirective) +def setup(app): + app.add_directive("devicegalleryitem", CustomDeviceGalleryItemDirective) diff --git a/doc/directives.py b/doc/directives.py index 953c5d38ba..3dfe1cc5d7 100644 --- a/doc/directives.py +++ b/doc/directives.py @@ -49,25 +49,27 @@ class CustomDeviceGalleryItemDirective(Directive): required_arguments = 0 optional_arguments = 4 final_argument_whitespace = True - option_spec = {'name': directives.unchanged, - 'description': directives.unchanged, - 'link': directives.unchanged} + option_spec = { + "name": directives.unchanged, + "description": directives.unchanged, + "link": directives.unchanged, + } has_content = False add_index = False def run(self): try: - if 'name' in self.options: - name = self.options['name'] + if "name" in self.options: + name = self.options["name"] - if 'description' in self.options: - description = self.options['description'] + if "description" in self.options: + description = self.options["description"] else: - raise ValueError('description not found') + raise ValueError("description not found") - if 'link' in self.options: - link = self.options['link'] + if "link" in self.options: + link = self.options["link"] else: link = "code/qml_templates" @@ -79,10 +81,8 @@ def run(self): raise return [] - thumbnail_rst = GALLERY_TEMPLATE.format(name=name, - description=description, - link=link) - thumbnail = StringList(thumbnail_rst.split('\n')) + thumbnail_rst = GALLERY_TEMPLATE.format(name=name, description=description, link=link) + thumbnail = StringList(thumbnail_rst.split("\n")) thumb = nodes.paragraph() self.state.nested_parse(thumbnail, self.content_offset, thumb) return [thumb] diff --git a/pennylane_lightning/src/util/CMakeLists.txt b/pennylane_lightning/src/util/CMakeLists.txt index 20e75282f5..36b51f00e6 100644 --- a/pennylane_lightning/src/util/CMakeLists.txt +++ b/pennylane_lightning/src/util/CMakeLists.txt @@ -1,7 +1,11 @@ project(lightning_utils LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) -add_library(lightning_utils INTERFACE) +set(UTIL_FILES RuntimeInfo.cpp CACHE INTERNAL "" FORCE) + +add_library(lightning_utils STATIC ${UTIL_FILES}) target_include_directories(lightning_utils INTERFACE $ $ -) \ No newline at end of file +) + +set_property(TARGET lightning_utils PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/pennylane_lightning/src/util/ConstantUtil.hpp b/pennylane_lightning/src/util/ConstantUtil.hpp index 532b49ee01..d3995e7642 100644 --- a/pennylane_lightning/src/util/ConstantUtil.hpp +++ b/pennylane_lightning/src/util/ConstantUtil.hpp @@ -42,7 +42,7 @@ constexpr auto lookup(const std::array, size> &arr, } } throw std::range_error("The given key does not exist."); -}; +} /** * @brief Check an array has an element. @@ -61,7 +61,7 @@ constexpr auto array_has_elt(const std::array &arr, const U &elt) } } return false; -}; +} /** * @brief Extract first elements from the array of pairs. @@ -207,4 +207,11 @@ constexpr auto reverse_pairs(const std::array, size> &arr) return Internal::reverse_pairs_helper(arr, std::make_index_sequence{}); } + +constexpr auto constIsPerfectPowerOf2(size_t value) -> bool { + while ((value & 1U) == 0) { + value >>= 1U; + } + return value == 1; +} } // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/LinearAlgebra.hpp b/pennylane_lightning/src/util/LinearAlgebra.hpp index 40ea4292a8..1cf36e7be9 100644 --- a/pennylane_lightning/src/util/LinearAlgebra.hpp +++ b/pennylane_lightning/src/util/LinearAlgebra.hpp @@ -17,9 +17,11 @@ */ #pragma once +#include #include #include #include +#include #include #include "Util.hpp" @@ -48,13 +50,13 @@ using CBLAS_LAYOUT = enum CBLAS_LAYOUT { /// @endcond // +namespace Pennylane::Util { enum class Trans : int { NoTranspose = CblasNoTrans, Transpose = CblasTrans, Adjoint = CblasConjTrans }; -namespace Pennylane::Util { /** * @brief Calculates the inner-product using OpenMP. * @@ -210,9 +212,9 @@ inline auto innerProdC(const std::complex *v1, const std::complex *v2, * @see innerProd(const std::complex *v1, const std::complex *v2, * const size_t data_size) */ -template -inline auto innerProd(const std::vector> &v1, - const std::vector> &v2) +template +inline auto innerProd(const std::vector, AllocA> &v1, + const std::vector, AllocB> &v2) -> std::complex { return innerProd(v1.data(), v2.data(), v1.size()); } @@ -224,9 +226,9 @@ inline auto innerProd(const std::vector> &v1, * @see innerProdC(const std::complex *v1, const std::complex *v2, * const size_t data_size) */ -template -inline auto innerProdC(const std::vector> &v1, - const std::vector> &v2) +template +inline auto innerProdC(const std::vector, AllocA> &v1, + const std::vector, AllocB> &v2) -> std::complex { return innerProdC(v1.data(), v2.data(), v1.size()); } @@ -461,15 +463,15 @@ inline static void CFTranspose(const std::complex *mat, * @param n Number of columns of `mat`. * @return mat transpose of shape n * m. */ -template -inline auto Transpose(const std::vector> &mat, size_t m, - size_t n) -> std::vector> { +template +inline auto Transpose(const std::vector, Alloc> &mat, size_t m, + size_t n) -> std::vector, Alloc> { if (mat.size() != m * n) { throw std::invalid_argument( "Invalid number of rows and columns for the input matrix"); } - std::vector> mat_t(n * m); + std::vector, Alloc> mat_t(n * m, mat.get_allocator()); CFTranspose(mat.data(), mat_t.data(), m, n, 0, m, 0, n); return mat_t; } @@ -484,15 +486,15 @@ inline auto Transpose(const std::vector> &mat, size_t m, * @param n Number of columns of `mat`. * @return mat transpose of shape n * m. */ -template -inline auto Transpose(const std::vector &mat, size_t m, size_t n) - -> std::vector { +template +inline auto Transpose(const std::vector &mat, size_t m, size_t n) + -> std::vector { if (mat.size() != m * n) { throw std::invalid_argument( "Invalid number of rows and columns for the input matrix"); } - std::vector mat_t(n * m); + std::vector mat_t(n * m, mat.get_allocator()); CFTranspose(mat.data(), mat_t.data(), m, n, 0, m, 0, n); return mat_t; } @@ -548,9 +550,10 @@ inline void vecMatrixProd(const T *v_in, const T *mat, T *v_out, size_t m, * @see inline void vecMatrixProd(const T *v_in, * const T *mat, T *v_out, size_t m, size_t n) */ -template -inline auto vecMatrixProd(const std::vector &v_in, const std::vector &mat, - size_t m, size_t n) -> std::vector { +template +inline auto vecMatrixProd(const std::vector &v_in, + const std::vector &mat, size_t m, size_t n) + -> std::vector { if (v_in.size() != m) { throw std::invalid_argument("Invalid size for the input vector"); } @@ -559,7 +562,7 @@ inline auto vecMatrixProd(const std::vector &v_in, const std::vector &mat, "Invalid number of rows and columns for the input matrix"); } - std::vector v_out(n); + std::vector v_out(n, mat.get_allocator()); vecMatrixProd(v_in.data(), mat.data(), v_out.data(), m, n); return v_out; @@ -745,4 +748,69 @@ inline auto matrixMatProd(const std::vector> m_left, return m_out; } + +/** + * @brief Calculate the squared norm of a vector + */ +template +auto squaredNorm(const std::complex *data, size_t data_size) + -> PrecisionT { + return std::transform_reduce( + data, data + data_size, PrecisionT{}, std::plus(), + static_cast &)>( + &std::norm)); +} + +/** + * @brief Generate random unitary matrix + * + * @return Generated unitary matrix in row-major format + */ +template +auto randomUnitary(RandomEngine &re, size_t num_qubits) + -> std::vector> { + using ComplexPrecisionT = std::complex; + const size_t dim = (1U << num_qubits); + std::vector res(dim * dim, ComplexPrecisionT{}); + + std::normal_distribution dist; + + auto generator = [&dist, &re]() -> ComplexPrecisionT { + return ComplexPrecisionT{dist(re), dist(re)}; + }; + + std::generate(res.begin(), res.end(), generator); + + // Simple algorithm to make rows orthogonal with Gram-Schmidt + // This algorithm is unstable but works for a small matrix. + // Use QR decomposition when we have LAPACK support. + + for (size_t row2 = 0; row2 < dim; row2++) { + ComplexPrecisionT *row2_p = res.data() + row2 * dim; + for (size_t row1 = 0; row1 < row2; row1++) { + const ComplexPrecisionT *row1_p = res.data() + row1 * dim; + ComplexPrecisionT dot12 = Util::innerProdC(row1_p, row2_p, dim); + ComplexPrecisionT dot11 = squaredNorm(row1_p, dim); + + // orthogonalize row2 + std::transform( + row2_p, row2_p + dim, row1_p, row2_p, + [scale = dot12 / dot11](auto &elt2, const auto &elt1) { + return elt2 - scale * elt1; + }); + } + } + + // Normalize each row + for (size_t row = 0; row < dim; row++) { + ComplexPrecisionT *row_p = res.data() + row * dim; + PrecisionT norm2 = std::sqrt(squaredNorm(row_p, dim)); + + // normalize row2 + std::transform(row_p, row_p + dim, row_p, [norm2](const auto c) { + return (static_cast(1.0) / norm2) * c; + }); + } + return res; +} } // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/Macros.hpp b/pennylane_lightning/src/util/Macros.hpp index 1b60d1e076..a8cb8c1d7d 100644 --- a/pennylane_lightning/src/util/Macros.hpp +++ b/pennylane_lightning/src/util/Macros.hpp @@ -13,12 +13,155 @@ // limitations under the License. /** * @file - * Define some builtin alternatives + * Define macros and compile-time constants. */ #pragma once +#include + +/** + * @brief Predefined macro variable to a string. Use std::format instead in + * C++20. + */ +#define PL_TO_STR_INDIR(x) #x +#define PL_TO_STR(VAR) PL_TO_STR_INDIR(VAR) + #if defined(__GNUC__) || defined(__clang__) #define PL_UNREACHABLE __builtin_unreachable() -#else +#elif defined(_MSC_VER) #define PL_UNREACHABLE __assume(false) +#else // Unsupported compiler +#define PL_UNREACHABLE +#endif + +#if defined(__AVX2__) +#define PL_USE_AVX2 1 +[[maybe_unused]] static constexpr bool use_avx2 = true; +#else +[[maybe_unused]] static constexpr bool use_avx2 = false; +#endif + +#if defined(__AVX512F__) +#define PL_USE_AVX512F 1 +[[maybe_unused]] static constexpr bool use_avx512f = true; +#else +[[maybe_unused]] static constexpr bool use_avx512f = false; +#endif + +#if defined(__AVX512DQ__) +#define PL_USE_AVX512DQ 1 +[[maybe_unused]] static constexpr bool use_avx512dq = true; +#else +[[maybe_unused]] static constexpr bool use_avx512dq = false; +#endif + +#if defined(__AVX512VL__) +#define PL_USE_AVX512VL 1 +[[maybe_unused]] static constexpr bool use_avx512vl = true; +#else +[[maybe_unused]] static constexpr bool use_avx512vl = false; +#endif + +#if defined(_OPENMP) +#define PL_USE_OMP 1 +[[maybe_unused]] static constexpr bool use_openmp = true; +#else +[[maybe_unused]] static constexpr bool use_openmp = false; +#endif + +#if (_OPENMP >= 202011) +#define PL_UNROLL_LOOP __Pragma("omp unroll(8)") +#elif defined(__GNUC__) +#define PL_UNROLL_LOOP _Pragma("GCC unroll 8") +#elif defined(__clang__) +#define PL_UNROLL_LOOP _Pragma("unroll(8)") +#else +#define PL_UNROLL_LOOP +#endif + +// Define force inline +#if defined(__GNUC__) || defined(__clang__) +#if NDEBUG +#define PL_FORCE_INLINE __attribute__((always_inline)) inline +#else +#define PL_FORCE_INLINE +#endif +#elif defined(_MSC_VER) +#if NDEBUG +#define PL_FORCE_INLINE __forceinline +#else +#define PL_FORCE_INLINE +#endif +#else +#if NDEBUG +#define PL_FORCE_INLINE inline +#else +#define PL_FORCE_INLINE +#endif +#endif + +namespace Pennylane::Util::Constant { +enum class CPUArch { AMD64, PPC64, ARM, Unknown }; + +constexpr auto getCPUArchClangGCC() { +#if defined(__x86_64__) + return CPUArch::AMD64; +#elif defined(__powerpc64__) + return CPUArch::PPC64; +#elif defined(__arm__) + return CPUArch::ARM; +#else + return CPUArch::Unknown; +#endif +} + +constexpr auto getCPUArchMSVC() { +#if defined(_M_AMD64) + return CPUArch::AMD64; +#elif defined(_M_PPC) + return CPUArch::PPC64; +#elif defined(_M_ARM) + return CPUArch::ARM; +#else + return CPUArch::Unknown; +#endif +} + +#if defined(__GNUC__) || defined(__clang__) +[[maybe_unused]] constexpr static auto cpu_arch = getCPUArchClangGCC(); +#elif defined(_MSC_VER) +[[maybe_unused]] constexpr static auto cpu_arch = getCPUArchMSVC(); +#else +[[maybe_unused]] constexpr static auto cpu_arch = CPUArch::Unknown; +#endif + +enum class Compiler { GCC, Clang, MSVC, Unknown }; + +template +constexpr auto getCompilerVersion() -> std::string_view { + return "Unknown version"; +} +template <> +constexpr auto getCompilerVersion() -> std::string_view { + return PL_TO_STR(__GNUC__) "." PL_TO_STR(__GNUC_MINOR__) "." PL_TO_STR( + __GNUC_PATCHLEVEL__); +} +template <> +constexpr auto getCompilerVersion() -> std::string_view { + return PL_TO_STR(__clang_major__) "." PL_TO_STR( + __clang_minor__) "." PL_TO_STR(__clang_patchlevel__); +} +template <> +constexpr auto getCompilerVersion() -> std::string_view { + return PL_TO_STR(_MSC_FULL_VER); +} +#if defined(__GNUC__) && !defined(__llvm__) && !defined(__INTEL_COMPILER) +[[maybe_unused]] constexpr static auto compiler = Compiler::GCC; +#elif defined(__clang__) +[[maybe_unused]] constexpr static auto compiler = Compiler::Clang; +#elif defined(_MSC_VER) +[[maybe_unused]] constexpr static auto compiler = Compiler::MSVC; +#else +[[maybe_unused]] constexpr static auto compiler = Compiler::Unknown; #endif +} // namespace Pennylane::Util::Constant diff --git a/pennylane_lightning/src/util/RuntimeInfo.cpp b/pennylane_lightning/src/util/RuntimeInfo.cpp new file mode 100644 index 0000000000..5a208cb540 --- /dev/null +++ b/pennylane_lightning/src/util/RuntimeInfo.cpp @@ -0,0 +1,71 @@ +// Copyright 2022 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "RuntimeInfo.hpp" + +#include + +#if defined(__GNUC__) || defined(__clang__) +#include +#elif defined(_MSC_VER) +#include +#endif + +namespace Pennylane::Util { +#if defined(__GNUC__) || defined(__clang__) +RuntimeInfo::InternalRuntimeInfo::InternalRuntimeInfo() { + const auto nids = __get_cpuid_max(0x00, nullptr); + if (nids == 0) { + return; // cpuid is not supported + } + + unsigned int eax = 0; + unsigned int ebx = 0; + unsigned int ecx = 0; + unsigned int edx = 0; + if (nids >= 1) { + eax = 1; + __get_cpuid(1, &eax, &ebx, &ecx, &edx); + f_1_ecx = ecx; + f_1_edx = edx; + } + if (nids >= 7) { // NOLINT(readability-magic-numbers) + // NOLINTNEXTLINE(readability-magic-numbers) + __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx); + f_7_ebx = ebx; + f_7_ecx = ecx; + } +} +#elif defined(_MSC_VER) +RuntimeInfo::InternalRuntimeInfo::InternalRuntimeInfo() { + std::array cpui; + __cpuid(cpui.data(), 0); + + int nids = cpui[0]; + + if (nids >= 1) { + __cpuidex(cpui.data(), 1, 0); + f_1_ecx = cpui[2]; + f_1_edx = cpui[3]; + } + + if (nids >= 7) { + __cpuidex(cpui.data(), 7, 0); + f_7_ebx = cpui[1]; + f_7_ecx = cpui[2]; + } +} +#else +RuntimeInfo::InternalRuntimeInfo::InternalRuntimeInfo(){}; +#endif +} // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/RuntimeInfo.hpp b/pennylane_lightning/src/util/RuntimeInfo.hpp new file mode 100644 index 0000000000..416422bd45 --- /dev/null +++ b/pennylane_lightning/src/util/RuntimeInfo.hpp @@ -0,0 +1,52 @@ +// Copyright 2022 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/** + * @file + * Runtime information based on cpuid + */ +#pragma once +#include + +namespace Pennylane::Util { +/** + * @brief This class is only usable in x86 or AMD64 architecture. + */ +class RuntimeInfo { + private: + struct InternalRuntimeInfo { + InternalRuntimeInfo(); + + std::bitset<32> f_1_ecx; + std::bitset<32> f_1_edx; + std::bitset<32> f_7_ebx; + std::bitset<32> f_7_ecx; + }; + + static const inline InternalRuntimeInfo internal_runtime_info_; + + public: + static inline bool AVX() { + // NOLINTNEXTLINE(readability-magic-numbers) + return internal_runtime_info_.f_1_ecx[28]; + } + static inline bool AVX2() { + // NOLINTNEXTLINE(readability-magic-numbers) + return internal_runtime_info_.f_7_ebx[5]; + } + static inline bool AVX512F() { + // NOLINTNEXTLINE(readability-magic-numbers) + return internal_runtime_info_.f_7_ebx[16]; + } +}; +} // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/TypeList.hpp b/pennylane_lightning/src/util/TypeList.hpp index e288bd80a5..a53c3cbd5d 100644 --- a/pennylane_lightning/src/util/TypeList.hpp +++ b/pennylane_lightning/src/util/TypeList.hpp @@ -18,14 +18,19 @@ #pragma once #include +#include #include +#include namespace Pennylane::Util { template struct TypeNode { using Type = T; using Next = TypeNode; }; - +template struct TypeNode { + using Type = T; + using Next = void; +}; template struct TypeNode { using Type = T; using Next = void; @@ -36,16 +41,33 @@ template struct TypeNode { */ template using TypeList = TypeNode; -template struct getNthType { - static_assert(!std::is_same_v, - "The given n is larger than the length of the typelist."); - using Type = getNthType; +/** + * @brief Get N-th type of a type list. + * + * @tparam TypeList Type list + * @tparam n The position of a type to extract + */ +template struct getNth { + using Type = typename getNth::Type; }; -template struct getNthType { +/// @cond DEV +template struct getNth { + static_assert(!std::is_same_v, + "The given n is larger than the length of the type list."); using Type = typename TypeList::Type; }; +/// @endcod + +/** + * @brief Convenient of alias of getNth + */ +template +using getNthType = typename getNth::Type; +/** + * @brief Get the size of a type list + */ template constexpr size_t length() { if constexpr (std::is_same_v) { return 0; @@ -53,4 +75,22 @@ template constexpr size_t length() { return 1 + length(); } } + +/** + * @brief Prepend a type to a type list. + * + * @tparam T Type to prepend + * @tparam U TypeList + */ +template struct PrependToTypeList; + +/// @cond DEV +template +struct PrependToTypeList> { + using Type = TypeNode; +}; +template struct PrependToTypeList { + using Type = TypeNode; +}; +/// @endcond } // namespace Pennylane::Util diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp index 3b184b82f9..adbe6d9f42 100644 --- a/pennylane_lightning/src/util/Util.hpp +++ b/pennylane_lightning/src/util/Util.hpp @@ -260,10 +260,11 @@ auto linspace(T start, T end, size_t num_points) -> std::vector { * * @tparam T Vector data type. * @param arr Array to be inspected. + * @param length Size of the array * @return a vector with indices that would sort the array. */ template -inline auto sorting_indices(const T &arr, size_t length) +inline auto sorting_indices(const T *arr, size_t length) -> std::vector { std::vector indices(length); iota(indices.begin(), indices.end(), 0); diff --git a/tests/test_measures.py b/tests/test_measures.py index a843253faa..10c48c5313 100644 --- a/tests/test_measures.py +++ b/tests/test_measures.py @@ -16,6 +16,7 @@ """ import numpy as np import pennylane as qml +import math from pennylane.measurements import ( Variance, Expectation, @@ -55,15 +56,20 @@ def dev(self): def test_probs_dtype64(self, dev): """Test if probs changes the state dtype""" - dev._state = np.array([1, 0]).astype(np.complex64) + dev._state = dev._asarray( + np.array([1 / math.sqrt(2), 1 / math.sqrt(2), 0, 0]).astype(np.complex64) + ) p = dev.probability(wires=[0, 1]) assert dev._state.dtype == np.complex64 - assert np.allclose(p, [1, 1, 0, 0]) + assert np.allclose(p, [0.5, 0.5, 0, 0]) + @pytest.mark.skipif( + not hasattr(np, "complex256"), reason="Numpy only defines complex256 in Linux-like system" + ) def test_probs_dtype_error(self, dev): """Test if probs raise error with complex256""" - dev._state = np.array([1, 0]).astype(np.complex256) + dev._state = np.array([1, 0, 0, 0]).astype(np.complex256) with pytest.raises(TypeError, match="Unsupported complex Type:"): dev.probability(wires=[0, 1]) @@ -179,6 +185,9 @@ def test_expval_dtype64(self, dev): assert dev._state.dtype == np.complex64 assert np.allclose(e, 0.0) + @pytest.mark.skipif( + not hasattr(np, "complex256"), reason="Numpy only defines complex256 in Linux-like system" + ) def test_expval_dtype_error(self, dev): """Test if expval raise error with complex256""" dev._state = np.array([1, 0]).astype(np.complex256) @@ -296,6 +305,9 @@ def test_var_dtype64(self, dev): assert dev._state.dtype == np.complex64 assert np.allclose(v, 1.0) + @pytest.mark.skipif( + not hasattr(np, "complex256"), reason="Numpy only defines complex256 in Linux-like system" + ) def test_expval_dtype_error(self, dev): """Test if var raise error with complex256""" dev._state = np.array([1, 0]).astype(np.complex256) From 1f9590b39b37d0caaeeaf0f6165cdf71d01e742b Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 19:40:20 -0500 Subject: [PATCH 54/94] Add tests --- .../src/gates/GateImplementationsLM.hpp | 1 + pennylane_lightning/src/tests/CMakeLists.txt | 5 +- .../src/tests/CreateAllWires.cpp | 30 +++ .../src/tests/CreateAllWires.hpp | 92 +++++++++ pennylane_lightning/src/tests/TestKernels.hpp | 2 +- ...est_GateImplementations_CompareKernels.cpp | 186 ++++++++++++++++++ .../src/tests/Test_RuntimeInfo.cpp | 13 ++ pennylane_lightning/src/tests/Test_Util.cpp | 1 + 8 files changed, 328 insertions(+), 2 deletions(-) create mode 100644 pennylane_lightning/src/tests/CreateAllWires.cpp create mode 100644 pennylane_lightning/src/tests/CreateAllWires.hpp create mode 100644 pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp create mode 100644 pennylane_lightning/src/tests/Test_RuntimeInfo.cpp diff --git a/pennylane_lightning/src/gates/GateImplementationsLM.hpp b/pennylane_lightning/src/gates/GateImplementationsLM.hpp index 9f227862b2..488079992e 100644 --- a/pennylane_lightning/src/gates/GateImplementationsLM.hpp +++ b/pennylane_lightning/src/gates/GateImplementationsLM.hpp @@ -235,6 +235,7 @@ class GateImplementationsLM : public PauliGenerator { static void applyMatrix(std::complex *arr, size_t num_qubits, const std::complex *matrix, const std::vector &wires, bool inverse) { + using Util::Trans; assert(num_qubits >= wires.size()); switch (wires.size()) { diff --git a/pennylane_lightning/src/tests/CMakeLists.txt b/pennylane_lightning/src/tests/CMakeLists.txt index c507f938bf..3bd3b713b9 100644 --- a/pennylane_lightning/src/tests/CMakeLists.txt +++ b/pennylane_lightning/src/tests/CMakeLists.txt @@ -68,9 +68,11 @@ endif() add_executable(compile_time_tests compile_time_tests.cpp) target_link_libraries(compile_time_tests lightning_gates lightning_utils) -set(TEST_SOURCES Test_AdjDiff.cpp +set(TEST_SOURCES CreateAllWires.cpp + Test_AdjDiff.cpp # Test_Bindings.cpp Test_DynamicDispatcher.cpp + Test_GateImplementations_CompareKernels.cpp Test_GateImplementations_Generator.cpp Test_GateImplementations_Inverse.cpp Test_GateImplementations_Matrix.cpp @@ -80,6 +82,7 @@ set(TEST_SOURCES Test_AdjDiff.cpp Test_Internal.cpp Test_Measures.cpp Test_OpToMemberFuncPtr.cpp + Test_RuntimeInfo.cpp Test_StateVectorManaged.cpp Test_StateVectorRaw.cpp Test_Util.cpp diff --git a/pennylane_lightning/src/tests/CreateAllWires.cpp b/pennylane_lightning/src/tests/CreateAllWires.cpp new file mode 100644 index 0000000000..dd0194a625 --- /dev/null +++ b/pennylane_lightning/src/tests/CreateAllWires.cpp @@ -0,0 +1,30 @@ +#include "CreateAllWires.hpp" +namespace Pennylane { +auto crateAllWires(size_t n_qubits, Gates::GateOperation gate_op, bool order) + -> std::vector> { + if (Util::array_has_elt(Gates::Constant::multi_qubit_gates, gate_op)) { + // make all possible 2^N permutations + std::vector> res; + res.reserve((1U << n_qubits) - 1); + ; + for (size_t k = 1; k < (static_cast(1U) << n_qubits); k++) { + std::vector wires; + wires.reserve(Util::popcount(k)); + + for (size_t i = 0; i < n_qubits; i++) { + if (((k >> i) & 1U) == 1U) { + wires.emplace_back(i); + } + } + + res.push_back(wires); + } + return res; + } // else + const size_t n_wires = Util::lookup(Gates::Constant::gate_wires, gate_op); + if (order) { + return PermutationGenerator(n_qubits, n_wires).all_perms(); + } // else + return CombinationGenerator(n_qubits, n_wires).all_perms(); +} +} // namespace Pennylane diff --git a/pennylane_lightning/src/tests/CreateAllWires.hpp b/pennylane_lightning/src/tests/CreateAllWires.hpp new file mode 100644 index 0000000000..54d3cd9e9a --- /dev/null +++ b/pennylane_lightning/src/tests/CreateAllWires.hpp @@ -0,0 +1,92 @@ +#pragma once +#include "BitUtil.hpp" +#include "Constant.hpp" +#include "ConstantUtil.hpp" +#include "GateOperation.hpp" + +#include +#include + +namespace Pennylane { + +class WiresGenerator { + public: + [[nodiscard]] virtual auto all_perms() const + -> const std::vector> & = 0; +}; +class CombinationGenerator : public WiresGenerator { + private: + std::vector v_; + std::vector> all_perms_; + + public: + void comb(size_t n, size_t r) { + if (r == 0) { + all_perms_.push_back(v_); + return; + } + if (n < r) { + return; + } + + v_[r - 1] = n - 1; + comb(n - 1, r - 1); + + comb(n - 1, r); + } + + CombinationGenerator(size_t n, size_t r) { + v_.resize(r); + comb(n, r); + } + + [[nodiscard]] auto all_perms() const + -> const std::vector> & override { + return all_perms_; + } +}; +class PermutationGenerator : public WiresGenerator { + private: + std::vector> all_perms_; + std::vector available_elts_; + std::vector v; + + public: + void perm(size_t n, size_t r) { + if (r == 0) { + all_perms_.push_back(v); + return; + } + for (size_t i = 0; i < n; i++) { + v[r - 1] = available_elts_[i]; + std::swap(available_elts_[n - 1], available_elts_[i]); + perm(n - 1, r - 1); + std::swap(available_elts_[n - 1], available_elts_[i]); + } + } + + PermutationGenerator(size_t n, size_t r) { + v.resize(r); + + available_elts_.resize(n); + std::iota(available_elts_.begin(), available_elts_.end(), 0); + perm(n, r); + } + + [[nodiscard]] auto all_perms() const + -> const std::vector> & override { + return all_perms_; + } +}; + +/** + * @brief Create all possible combination of wires + * for a given number of qubits and gate operation + * + * @param n_qubits Number of qubits + * @param gate_op Gate operation + * @param order Whether the ordering matters (if true, permutation is used) + */ +auto crateAllWires(size_t n_qubits, Gates::GateOperation gate_op, bool order) + -> std::vector>; +} // namespace Pennylane diff --git a/pennylane_lightning/src/tests/TestKernels.hpp b/pennylane_lightning/src/tests/TestKernels.hpp index e9b9cfa785..74085a4d75 100644 --- a/pennylane_lightning/src/tests/TestKernels.hpp +++ b/pennylane_lightning/src/tests/TestKernels.hpp @@ -10,4 +10,4 @@ using TestKernels = Pennylane::Util::TypeList; + Pennylane::Gates::GateImplementationsPI, void>; diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp new file mode 100644 index 0000000000..c66f07e522 --- /dev/null +++ b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp @@ -0,0 +1,186 @@ +#include "CreateAllWires.hpp" +#include "TestHelpers.hpp" + +#include "OpToMemberFuncPtr.hpp" +#include "TestKernels.hpp" +#include "Util.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include + +/** + * @file Test_GateImplementations_Nonparam.cpp + * + * This file tests all gate operations (besides matrix) by comparing results + * between different kernels (gate implementations). + */ +using namespace Pennylane; +using namespace Pennylane::Gates; +using namespace Pennylane::Util; + +namespace { +using namespace Pennylane::Gates::Constant; +} // namespace + +using std::vector; + +template std::string kernelsToString() { + if constexpr (!std::is_same_v) { + return std::string(TypeList::Type::name) + ", " + + kernelsToString(); + } + return ""; +} + +/* Type transformation */ +template +struct KernelsImplementingGateHelper { + using Type = std::conditional_t< + array_has_elt(TypeList::Type::implemented_gates, gate_op), + typename PrependToTypeList< + typename TypeList::Type, + typename KernelsImplementingGateHelper< + gate_op, typename TypeList::Next>::Type>::Type, + typename KernelsImplementingGateHelper::Type>; +}; +template +struct KernelsImplementingGateHelper { + using Type = void; +}; + +template struct KernelsImplementingGate { + using Type = + typename KernelsImplementingGateHelper::Type; +}; + +/** + * @brief Apply the given gate operation with the given gate implementation. + * + * @tparam gate_op Gate operation to test + * @tparam PrecisionT Floating point data type for statevector + * @tparam ParamT Floating point data type for parameter + * @tparam GateImplementation Gate implementation class + * @param ini Initial statevector + * @param num_qubits Number of qubits + * @param wires Wires the gate applies to + * @param inverse Whether to use inverse of gate + * @param params Paramters for gate + */ +template +auto applyGate(std::vector, Alloc> ini, + size_t num_qubits, const std::vector &wires, + bool inverse, const std::vector ¶ms) + -> std::vector, Alloc> { + callGateOps(GateOpToMemberFuncPtr::value, + ini.data(), num_qubits, wires, inverse, params); + return ini; +} + +/** + * @brief Apply the given gate using all implementing kernels and return + * results in tuple. + */ +template +auto applyGateForImplemetingKernels( + const std::vector, Alloc> &ini, size_t num_qubits, + const std::vector &wires, bool inverse, + const std::vector ¶ms, + [[maybe_unused]] std::index_sequence dummy) { + return std::make_tuple( + applyGate>( + ini, num_qubits, wires, inverse, params)...); +} + +template +void testApplyGate(RandomEngine &re, size_t num_qubits) { + const auto ini = createRandomState(re, num_qubits); + + using Kernels = typename KernelsImplementingGate::Type; + + INFO("Kernels implementing " << lookup(gate_names, gate_op) << " are " + << kernelsToString()); + + INFO("PrecisionT, ParamT = " << PrecisionToName::value << ", " + << PrecisionToName::value); + + const auto all_wires = crateAllWires(num_qubits, gate_op, true); + for (const auto &wires : all_wires) { + const auto params = createParams(gate_op); + const auto gate_name = lookup(gate_names, gate_op); + DYNAMIC_SECTION( + "Test gate " + << gate_name + << " with inverse = false") { // Test with inverse = false + const auto results = Util::tuple_to_array( + applyGateForImplemetingKernels( + ini, num_qubits, wires, false, params, + std::make_index_sequence()>())); + + for (size_t i = 0; i < results.size() - 1; i++) { + REQUIRE(results[i] == + PLApprox(results[i + 1]) + .margin(static_cast(1e-5))); + } + } + + DYNAMIC_SECTION("Test gate " + << gate_name + << " with inverse = true") { // Test with inverse = true + const auto results = Util::tuple_to_array( + applyGateForImplemetingKernels( + ini, num_qubits, wires, true, params, + std::make_index_sequence()>())); + + for (size_t i = 0; i < results.size() - 1; i++) { + REQUIRE(results[i] == + PLApprox(results[i + 1]) + .margin(static_cast(1e-5))); + } + } + } +} + +template +void testAllGatesIter(RandomEngine &re, size_t max_num_qubits) { + if constexpr (gate_idx < static_cast(GateOperation::END)) { + constexpr static auto gate_op = static_cast(gate_idx); + + if constexpr (gate_op != GateOperation::Matrix) { + size_t min_num_qubits = array_has_elt(multi_qubit_gates, gate_op) + ? 1 + : lookup(gate_wires, gate_op); + for (size_t num_qubits = min_num_qubits; + num_qubits < max_num_qubits; num_qubits++) { + testApplyGate(re, num_qubits); + } + testAllGatesIter(re, + max_num_qubits); + } + } +} + +template +void testAllGates(RandomEngine &re, size_t max_num_qubits) { + testAllGatesIter<0, PrecisionT, ParamT>(re, max_num_qubits); +} + +TEMPLATE_TEST_CASE("Test all kernels give the same results", + "[Test_GateImplementations_CompareKernels]", float, double) { + std::mt19937 re{1337}; + testAllGates(re, 6); +} diff --git a/pennylane_lightning/src/tests/Test_RuntimeInfo.cpp b/pennylane_lightning/src/tests/Test_RuntimeInfo.cpp new file mode 100644 index 0000000000..93823e386b --- /dev/null +++ b/pennylane_lightning/src/tests/Test_RuntimeInfo.cpp @@ -0,0 +1,13 @@ +#include "Macros.hpp" +#include "RuntimeInfo.hpp" + +#include + +using namespace Pennylane::Util; + +TEST_CASE("Runtime information is correct", "[Test_RuntimeInfo]") { + INFO("RuntimeInfo::AVX " << RuntimeInfo::AVX()); + INFO("RuntimeInfo::AVX2 " << RuntimeInfo::AVX2()); + INFO("RuntimeInfo::AVX512F " << RuntimeInfo::AVX512F()); + REQUIRE(true); +} diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp index 4360a793f5..c07da227c9 100644 --- a/pennylane_lightning/src/tests/Test_Util.cpp +++ b/pennylane_lightning/src/tests/Test_Util.cpp @@ -60,6 +60,7 @@ TEMPLATE_TEST_CASE("Constant values", "[Util]", float, double) { // NOLINTNEXTLINE: Avoid complexity errors TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, double) { + using Util::Trans; SECTION("exp2: 2^n") { for (size_t i = 0; i < 10; i++) { CHECK(Util::exp2(i) == static_cast(std::pow(2, i))); From 66ceab58a49452e119a2efb9d7d72d272028420d Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 19:55:59 -0500 Subject: [PATCH 55/94] Add runtime/compile info in binary --- cmake/process_options.cmake | 14 +++++ pennylane_lightning/src/bindings/Bindings.cpp | 13 ++++- pennylane_lightning/src/bindings/Bindings.hpp | 55 +++++++++++++++++++ tests/test_binary_info.py | 33 +++++++++++ 4 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 tests/test_binary_info.py diff --git a/cmake/process_options.cmake b/cmake/process_options.cmake index d3ecccd3f9..815a04e43d 100644 --- a/cmake/process_options.cmake +++ b/cmake/process_options.cmake @@ -62,6 +62,20 @@ else() message(STATUS "ENABLE_AVX is OFF") endif() +if(ENABLE_AVX2) + message(STATUS "ENABLE_AVX2 is ON.") + target_compile_options(lightning_compile_options INTERFACE -mavx2) +else() + message(STATUS "ENABLE_AVX2 is OFF") +endif() + +if(ENABLE_AVX512) + message(STATUS "ENABLE_AVX512 is ON.") + target_compile_options(lightning_compile_options INTERFACE -mavx512f) # Now we only use avx512f +else() + message(STATUS "ENABLE_AVX512 is OFF") +endif() + if(ENABLE_OPENMP) message(STATUS "ENABLE_OPENMP is ON.") find_package(OpenMP) diff --git a/pennylane_lightning/src/bindings/Bindings.cpp b/pennylane_lightning/src/bindings/Bindings.cpp index eb1e73daf8..73b98f8c55 100644 --- a/pennylane_lightning/src/bindings/Bindings.cpp +++ b/pennylane_lightning/src/bindings/Bindings.cpp @@ -364,6 +364,12 @@ PYBIND11_MODULE(lightning_qubit_ops, // NOLINT: No control over Pybind internals &Gates::getIndicesAfterExclusion), "Get statevector indices for gate application"); + /* Add compile info */ + m.def("compile_info", &getCompileInfo, "Compiled binary information."); + + /* Add compile info */ + m.def("runtime_info", &getRuntimeInfo, "Runtime information."); + /* Add EXPORTED_KERNELS */ std::vector> exported_kernel_ops; @@ -372,7 +378,7 @@ PYBIND11_MODULE(lightning_qubit_ops, // NOLINT: No control over Pybind internals const auto implemented_gates = implementedGatesForKernel(kernel); for (const auto gate_op : implemented_gates) { const auto gate_name = - std::string(lookup(Constant::gate_names, gate_op)); + std::string(lookup(Gates::Constant::gate_names, gate_op)); exported_kernel_ops.emplace_back(kernel_name, gate_name); } } @@ -381,8 +387,9 @@ PYBIND11_MODULE(lightning_qubit_ops, // NOLINT: No control over Pybind internals /* Add DEFAULT_KERNEL_FOR_OPS */ std::map default_kernel_ops_map; - for (const auto &[gate_op, name] : Constant::gate_names) { - const auto kernel = lookup(Constant::default_kernel_for_gates, gate_op); + for (const auto &[gate_op, name] : Gates::Constant::gate_names) { + const auto kernel = + lookup(Gates::Constant::default_kernel_for_gates, gate_op); const auto kernel_name = Util::lookup(kernel_id_name_pairs, kernel); default_kernel_ops_map.emplace(std::string(name), kernel_name); } diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index 5d79774ffd..d247144a8a 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -19,8 +19,10 @@ #pragma once #include "AdjointDiff.hpp" #include "JacobianProd.hpp" +#include "Macros.hpp" #include "Measures.hpp" #include "OpToMemberFuncPtr.hpp" +#include "RuntimeInfo.hpp" #include "StateVectorRaw.hpp" #include "pybind11/complex.h" @@ -227,4 +229,57 @@ template void registerKernelsToPyexport(PyClass &pyclass) { registerKernelsToPyexportIter(pyclass); } + +/** + * @brief Return basic information of the compiled binary. + */ +auto getCompileInfo() -> pybind11::dict { + using namespace Util::Constant; + using namespace pybind11::literals; + + const std::string_view cpu_arch_str = [] { + switch (cpu_arch) { + case CPUArch::AMD64: + return "AMD64"; + case CPUArch::PPC64: + return "PPC64"; + case CPUArch::ARM: + return "ARM"; + default: + return "Unknown"; + } + }(); + + const std::string_view compiler_name_str = [] { + switch (compiler) { + case Compiler::GCC: + return "GCC"; + case Compiler::Clang: + return "Clang"; + case Compiler::MSVC: + return "MSVC"; + default: + return "Unknown"; + } + }(); + + const auto compiler_version_str = getCompilerVersion(); + + return pybind11::dict("cpu.arch"_a = cpu_arch_str, + "compiler.name"_a = compiler_name_str, + "compiler.version"_a = compiler_version_str, + "AVX2"_a = use_avx2, "AVX512F"_a = use_avx512f); +} + +/** + * @brief Return basic information of runtime environment + */ +auto getRuntimeInfo() -> pybind11::dict { + using namespace Util::Constant; + using namespace pybind11::literals; + + return pybind11::dict("AVX"_a = RuntimeInfo::AVX(), + "AVX2"_a = RuntimeInfo::AVX2(), + "AVX512F"_a = RuntimeInfo::AVX512F()); +} } // namespace Pennylane diff --git a/tests/test_binary_info.py b/tests/test_binary_info.py new file mode 100644 index 0000000000..7fe6ff5b72 --- /dev/null +++ b/tests/test_binary_info.py @@ -0,0 +1,33 @@ +# Copyright 2018-2020 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Test binary information of ``lightning.qubit``. +""" + +try: + from pennylane_lightning.lightning_qubit_ops import runtime_info, compile_info +except (ImportError, ModuleNotFoundError): + pytest.skip("No binary module found. Skipping.", allow_module_level=True) + + +def test_runtime_info(): + m = runtime_info() + for key in ["AVX", "AVX2", "AVX512F"]: + assert key in m + + +def test_compile_info(): + m = compile_info() + for key in ["cpu.arch", "compiler.name", "compiler.version", "AVX2", "AVX512F"]: + assert key in m From d237ceb39ab35cb7011c211747e52196c803ba9b Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Thu, 10 Mar 2022 00:57:00 +0000 Subject: [PATCH 56/94] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index fd08943a9b..7b13995518 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.22.0-dev16" +__version__ = "0.22.0-dev17" From 386edb609c7065e03ef61476494d6404aa30497f Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 19:58:26 -0500 Subject: [PATCH 57/94] Fix for non-biary --- tests/test_binary_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_binary_info.py b/tests/test_binary_info.py index 7fe6ff5b72..dfa207436b 100644 --- a/tests/test_binary_info.py +++ b/tests/test_binary_info.py @@ -14,6 +14,7 @@ """ Test binary information of ``lightning.qubit``. """ +import pytest try: from pennylane_lightning.lightning_qubit_ops import runtime_info, compile_info @@ -26,7 +27,6 @@ def test_runtime_info(): for key in ["AVX", "AVX2", "AVX512F"]: assert key in m - def test_compile_info(): m = compile_info() for key in ["cpu.arch", "compiler.name", "compiler.version", "AVX2", "AVX512F"]: From cd9027f08d020d9506992d9aa52284e7db4023a7 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 20:00:34 -0500 Subject: [PATCH 58/94] Format --- tests/test_binary_info.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_binary_info.py b/tests/test_binary_info.py index dfa207436b..d0035ac61e 100644 --- a/tests/test_binary_info.py +++ b/tests/test_binary_info.py @@ -27,6 +27,7 @@ def test_runtime_info(): for key in ["AVX", "AVX2", "AVX512F"]: assert key in m + def test_compile_info(): m = compile_info() for key in ["cpu.arch", "compiler.name", "compiler.version", "AVX2", "AVX512F"]: From da3d9ec097ac37c0a5a3d6742bc7f1be8d66f922 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 20:08:49 -0500 Subject: [PATCH 59/94] Update tidy --- pennylane_lightning/src/.clang-tidy | 6 ++---- pennylane_lightning/src/CMakeLists.txt | 3 ++- pennylane_lightning/src/tests/.clang-tidy | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pennylane_lightning/src/.clang-tidy b/pennylane_lightning/src/.clang-tidy index f015b16a1d..50b924d24b 100644 --- a/pennylane_lightning/src/.clang-tidy +++ b/pennylane_lightning/src/.clang-tidy @@ -1,5 +1,5 @@ --- -Checks: 'clang-diagnostic-*,clang-analyzer-*,-*,-llvmlibc-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,hicpp-*,-hicpp-no-array-decay,bugprone-suspicious-*,llvm-namespace-comment,' +Checks: '-*,clang-diagnostic-*,clang-analyzer-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,hicpp-*,-hicpp-avoid-c-arrays,-hicpp-no-array-decay,bugprone-suspicious-*,llvm-namespace-comment,cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-slicing,cppcoreguidelines-special-member-functions' WarningsAsErrors: '*' HeaderFilterRegex: '.*' AnalyzeTemporaryDtors: false @@ -25,8 +25,6 @@ CheckOptions: value: 'false' - key: readability-magic-numbers.IgnoredIntegerValues value: '1;2;3;4;' - - key: readability-magic-numbers.IgnorePowersOf2IntegerValues - value: true - key: modernize-use-default-member-init.UseAssignment value: 'false' - key: readability-function-size.NestingThreshold @@ -218,7 +216,7 @@ CheckOptions: - key: modernize-use-auto.RemoveStars value: 'false' - key: readability-magic-numbers.IgnorePowersOf2IntegerValues - value: 'false' + value: 'true' - key: portability-simd-intrinsics.Std value: '' - key: readability-redundant-member-init.IgnoreBaseInCopyConstructors diff --git a/pennylane_lightning/src/CMakeLists.txt b/pennylane_lightning/src/CMakeLists.txt index b6776ac992..0385f947d6 100644 --- a/pennylane_lightning/src/CMakeLists.txt +++ b/pennylane_lightning/src/CMakeLists.txt @@ -11,8 +11,9 @@ if(ENABLE_CLANG_TIDY) if(NOT DEFINED CLANG_TIDY_BINARY) set(CLANG_TIDY_BINARY clang-tidy) endif() + message(STATUS "Using CLANG_TIDY_BINARY=${CLANG_TIDY_BINARY}") set(CMAKE_CXX_CLANG_TIDY ${CLANG_TIDY_BINARY}; - -extra-arg=-std=c++17; + -extra-arg=-std=c++17; ) endif() diff --git a/pennylane_lightning/src/tests/.clang-tidy b/pennylane_lightning/src/tests/.clang-tidy index 3ed93f21bf..75afabace1 100644 --- a/pennylane_lightning/src/tests/.clang-tidy +++ b/pennylane_lightning/src/tests/.clang-tidy @@ -1,5 +1,5 @@ --- -Checks: 'clang-diagnostic-*,clang-analyzer-*,-*,-llvmlibc-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,-readability-magic-numbers,-modernize-avoid-c-arrays' +Checks: '-*,clang-diagnostic-*,clang-analyzer-*,-llvmlibc-*,modernize-*,-modernize-use-trailing-return-type,clang-analyzer-cplusplus*,openmp-*,performance-*,portability-*,readability-*,-modernize-avoid-c-arrays,-readability-magic-numbers,hicpp-*,-hicpp-no-array-decay,-hicpp-avoid-c-arrays,bugprone-suspicious-*,llvm-namespace-comment,cppcoreguidelines-slicing,cppcoreguidelines-special-member-functions' WarningsAsErrors: '*' HeaderFilterRegex: '.*' AnalyzeTemporaryDtors: false @@ -216,7 +216,7 @@ CheckOptions: - key: modernize-use-auto.RemoveStars value: 'false' - key: readability-magic-numbers.IgnorePowersOf2IntegerValues - value: 'false' + value: 'true' - key: portability-simd-intrinsics.Std value: '' - key: readability-redundant-member-init.IgnoreBaseInCopyConstructors From 6f7d770a40e5e8b45e729698e5f360cbd868100a Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 20:36:23 -0500 Subject: [PATCH 60/94] Fix for tidy --- .../src/gates/GateImplementationsLM.hpp | 4 ++-- .../src/simulator/DynamicDispatcher.hpp | 4 ++-- .../src/simulator/Measures.hpp | 2 +- pennylane_lightning/src/tests/TestHelpers.hpp | 2 +- .../src/tests/Test_AdjDiff.cpp | 16 +++++++------- .../Test_GateImplementations_Generator.cpp | 2 -- .../Test_GateImplementations_Nonparam.cpp | 10 +++------ .../tests/Test_GateImplementations_Param.cpp | 12 +++++----- .../src/tests/Test_OpToMemberFuncPtr.cpp | 1 + .../src/tests/Test_StateVectorRaw.cpp | 2 +- pennylane_lightning/src/tests/Test_Util.cpp | 6 ++--- .../src/tests/Test_VectorJacobianProduct.cpp | 22 +++++++++---------- pennylane_lightning/src/util/BitUtil.hpp | 5 +++-- 13 files changed, 42 insertions(+), 46 deletions(-) diff --git a/pennylane_lightning/src/gates/GateImplementationsLM.hpp b/pennylane_lightning/src/gates/GateImplementationsLM.hpp index 488079992e..a985e0e373 100644 --- a/pennylane_lightning/src/gates/GateImplementationsLM.hpp +++ b/pennylane_lightning/src/gates/GateImplementationsLM.hpp @@ -258,8 +258,8 @@ class GateImplementationsLM : public PauliGenerator { size_t idx = k | inner_idx; size_t n_wires = wires.size(); for (size_t pos = 0; pos < n_wires; pos++) { - bitswap(idx, n_wires - pos - 1, - num_qubits - wires[pos] - 1); + idx = bitswap(idx, n_wires - pos - 1, + num_qubits - wires[pos] - 1); } indices[inner_idx] = idx; coeffs_in[inner_idx] = arr[idx]; diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp index 83536f9076..469873751f 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp @@ -60,12 +60,12 @@ namespace Pennylane { template struct registerBeforeMain; template <> struct registerBeforeMain { - static inline int dummy = + const static inline int dummy = Internal::registerAllAvailableKernels(); }; template <> struct registerBeforeMain { - static inline int dummy = + const static inline int dummy = Internal::registerAllAvailableKernels(); }; diff --git a/pennylane_lightning/src/simulator/Measures.hpp b/pennylane_lightning/src/simulator/Measures.hpp index 26208b6ba1..9e45453067 100644 --- a/pennylane_lightning/src/simulator/Measures.hpp +++ b/pennylane_lightning/src/simulator/Measures.hpp @@ -46,7 +46,7 @@ class Measures { using CFP_t = std::complex; public: - Measures(const SVType &provided_statevector) + explicit Measures(const SVType &provided_statevector) : original_statevector{provided_statevector} {}; /** diff --git a/pennylane_lightning/src/tests/TestHelpers.hpp b/pennylane_lightning/src/tests/TestHelpers.hpp index 74faeeb5ce..655a43079d 100644 --- a/pennylane_lightning/src/tests/TestHelpers.hpp +++ b/pennylane_lightning/src/tests/TestHelpers.hpp @@ -110,7 +110,7 @@ isApproxEqual(const std::vector &data1, const typename Data_t::value_type eps = std::numeric_limits::epsilon() * 100) { - return data1 == PLApprox(data2); + return data1 == PLApprox(data2).epsilon(eps); } /** diff --git a/pennylane_lightning/src/tests/Test_AdjDiff.cpp b/pennylane_lightning/src/tests/Test_AdjDiff.cpp index dfbac67214..2ca9e3213b 100644 --- a/pennylane_lightning/src/tests/Test_AdjDiff.cpp +++ b/pennylane_lightning/src/tests/Test_AdjDiff.cpp @@ -50,7 +50,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=RX, Obs=Z", for (const auto &p : param) { auto ops = OpsData({"RX"}, {{p}}, {{0}}, {false}); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); cdata[0] = std::complex{1, 0}; StateVectorRaw psi(cdata.data(), cdata.size()); @@ -82,7 +82,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=RY, Obs=X", for (const auto &p : param) { auto ops = OpsData({"RY"}, {{p}}, {{0}}, {false}); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); cdata[0] = std::complex{1, 0}; StateVectorRaw psi(cdata.data(), cdata.size()); @@ -109,7 +109,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=RX, Obs=[Z,Z]", const size_t num_obs = 2; std::vector jacobian(num_obs * num_params, 0); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRaw psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -140,7 +140,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=[RX,RX,RX], Obs=[Z,Z,Z]", const size_t num_obs = 3; std::vector jacobian(num_obs * num_params, 0); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRaw psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -179,7 +179,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=[RX,RX,RX], Obs=[Z,Z,Z], " std::vector jacobian(num_obs * num_params, 0); std::vector t_params{0, 2}; - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRaw psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -214,7 +214,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=[RX,RX,RX], Obs=[ZZZ]", const size_t num_obs = 1; std::vector jacobian(num_obs * num_params, 0); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRaw psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -249,7 +249,7 @@ TEST_CASE("AdjointJacobian::adjointJacobian Op=Mixed, Obs=[XXX]", const size_t num_obs = 1; std::vector jacobian(num_obs * num_params, 0); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRaw psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -388,4 +388,4 @@ TEST_CASE("AdjointJacobian::adjointJacobian Mixed Ops, Obs and TParams", CHECK(expected[1] == Approx(jacobian[1])); CHECK(expected[2] == Approx(jacobian[2])); } -} \ No newline at end of file +} diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp index 377c45bd5f..fd045742e9 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp @@ -35,8 +35,6 @@ constexpr std::string_view remove_prefix(const std::string_view &str, return {str.data() + len, str.length() - len}; } -constexpr auto gate_name_to_ops = Util::reverse_pairs(Constant::gate_names); - template constexpr auto findGateOpForGenerator() -> GateOperation { constexpr auto gntr_name = diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp index 86894a47f7..49eb353529 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp @@ -21,10 +21,6 @@ */ using namespace Pennylane; -namespace { -using std::vector; -} - /** * @brief Run test suit only when the gate is defined */ @@ -77,7 +73,7 @@ void testApplyPauliX() { GateImplementation::applyPauliX(st.data(), num_qubits, {index}, false); CHECK(st[0] == Util::ZERO()); - CHECK(st[0b1 << (num_qubits - index - 1)] == Util::ONE()); + CHECK(st[1U << (num_qubits - index - 1)] == Util::ONE()); } } PENNYLANE_RUN_TEST(PauliX); @@ -147,9 +143,9 @@ void testApplyHadamard() { CHECK(expected.imag() == Approx(st[0].imag())); CHECK(expected.real() == - Approx(st[0b1 << (num_qubits - index - 1)].real())); + Approx(st[1U << (num_qubits - index - 1)].real())); CHECK(expected.imag() == - Approx(st[0b1 << (num_qubits - index - 1)].imag())); + Approx(st[1U << (num_qubits - index - 1)].imag())); } } PENNYLANE_RUN_TEST(Hadamard); diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp index cc6f687e11..894038c514 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp @@ -237,15 +237,15 @@ void testApplyRot() { std::vector{2.3, 0.1, 0.4}}; std::vector> expected_results{ - std::vector(0b1 << num_qubits), - std::vector(0b1 << num_qubits), - std::vector(0b1 << num_qubits)}; + std::vector(1U << num_qubits), + std::vector(1U << num_qubits), + std::vector(1U << num_qubits)}; for (size_t i = 0; i < angles.size(); i++) { const auto rot_mat = Gates::getRot(angles[i][0], angles[i][1], angles[i][2]); expected_results[i][0] = rot_mat[0]; - expected_results[i][0b1 << (num_qubits - i - 1)] = rot_mat[2]; + expected_results[i][1U << (num_qubits - i - 1)] = rot_mat[2]; } for (size_t index = 0; index < num_qubits; index++) { @@ -1225,8 +1225,8 @@ void testApplyCRot() { std::vector expected_results(8); const auto rot_mat = Gates::getRot(angles[0], angles[1], angles[2]); - expected_results[0b1 << (num_qubits - 1)] = rot_mat[0]; - expected_results[(0b1 << num_qubits) - 2] = rot_mat[2]; + expected_results[1U << (num_qubits - 1)] = rot_mat[0]; + expected_results[(1U << num_qubits) - 2] = rot_mat[2]; DYNAMIC_SECTION(GateImplementation::name << ", CRot0,1 |000> -> |000> - " diff --git a/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp b/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp index a46a7387f6..81f85038e4 100644 --- a/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp +++ b/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp @@ -104,6 +104,7 @@ class DummyImplementation { static_cast(arr); static_cast(num_qubits); static_cast(matrix); + static_cast(wires); static_cast(inverse); } diff --git a/pennylane_lightning/src/tests/Test_StateVectorRaw.cpp b/pennylane_lightning/src/tests/Test_StateVectorRaw.cpp index 4700c74881..e80c076583 100644 --- a/pennylane_lightning/src/tests/Test_StateVectorRaw.cpp +++ b/pennylane_lightning/src/tests/Test_StateVectorRaw.cpp @@ -43,5 +43,5 @@ TEMPLATE_TEST_CASE("StateVectorRaw::setData", "[StateVectorRaw]", float, REQUIRE(sv.getNumQubits() == 8); REQUIRE(sv.getData() == st_data2.data()); - REQUIRE(sv.getLength() == (1U << 8)); + REQUIRE(sv.getLength() == (1U << 8U)); } diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp index c07da227c9..18c0a7c4b9 100644 --- a/pennylane_lightning/src/tests/Test_Util.cpp +++ b/pennylane_lightning/src/tests/Test_Util.cpp @@ -469,7 +469,7 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, */ size_t popcount_slow(uint64_t x) { size_t c = 0; - for (; x != 0; x >>= 1) { + for (; x != 0U; x >>= 1U) { if ((x & 1U) != 0U) { c++; } @@ -484,8 +484,8 @@ size_t popcount_slow(uint64_t x) { */ size_t ctz_slow(uint64_t x) { size_t c = 0; - while ((x & 1) == 0) { - x >>= 1; + while ((x & 1U) == 0) { + x >>= 1U; c++; } return c; diff --git a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp index babee6b726..8636feb1d9 100644 --- a/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp +++ b/pennylane_lightning/src/tests/Test_VectorJacobianProduct.cpp @@ -53,7 +53,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z dy={0}", for (const auto &p : param) { auto ops = OpsData({"RX"}, {{p}}, {{0}}, {false}); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); cdata[0] = std::complex{1, 0}; StateVectorRaw psi(cdata.data(), cdata.size()); @@ -91,7 +91,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z dy={1}", for (const auto &p : param) { auto ops = OpsData({"RX"}, {{p}}, {{0}}, {false}); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); cdata[0] = std::complex{1, 0}; StateVectorRaw psi(cdata.data(), cdata.size()); @@ -129,7 +129,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RX, Obs=Z dy={0.4}", for (const auto &p : param) { auto ops = OpsData({"RX"}, {{p}}, {{0}}, {false}); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); cdata[0] = std::complex{1, 0}; StateVectorRaw psi(cdata.data(), cdata.size()); @@ -168,7 +168,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=RY, Obs=X dy={0.4}", for (const auto &p : param) { auto ops = OpsData({"RY"}, {{p}}, {{0}}, {false}); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); cdata[0] = std::complex{1, 0}; StateVectorRaw psi(cdata.data(), cdata.size()); @@ -203,7 +203,7 @@ TEST_CASE( std::vector vjp_res(num_params); std::vector dy(num_obs, 1); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRaw psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -239,7 +239,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], " std::vector vjp_res(num_params); std::vector dy(num_obs, 0.4); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRaw psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -282,7 +282,7 @@ TEST_CASE( std::vector vjp_res(num_params); std::vector dy(num_obs, 1); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRaw psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -322,7 +322,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=[RX,RX,RX], " std::vector vjp_res(num_params); std::vector dy(num_obs, 0.4); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRaw psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -361,7 +361,7 @@ TEST_CASE( std::vector vjp_res(num_params); std::vector dy(num_obs, 1); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRaw psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -412,7 +412,7 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Op=Mixed, Obs=[XXX], " std::vector vjp_res(num_params); std::vector dy(num_obs, -0.2); - std::vector> cdata(0b1 << num_qubits); + std::vector> cdata(1U << num_qubits); StateVectorRaw psi(cdata.data(), cdata.size()); cdata[0] = std::complex{1, 0}; @@ -622,4 +622,4 @@ TEST_CASE("VectorJacobianProduct::vectorJacobianProduct Mixed Ops, Obs and " CHECK(-0.5 * expected[1] == Approx(vjp_res[1]).margin(1e-7)); CHECK(-0.5 * expected[2] == Approx(vjp_res[2]).margin(1e-7)); } -} \ No newline at end of file +} diff --git a/pennylane_lightning/src/util/BitUtil.hpp b/pennylane_lightning/src/util/BitUtil.hpp index 8b7251ddc3..48fe1ddfcf 100644 --- a/pennylane_lightning/src/util/BitUtil.hpp +++ b/pennylane_lightning/src/util/BitUtil.hpp @@ -196,8 +196,9 @@ inline auto constexpr fillLeadingOnes(size_t pos) -> size_t { /** * @brief Swap bits in i-th and j-th position in place */ -inline void constexpr bitswap(size_t bits, const size_t i, const size_t j) { +inline auto constexpr bitswap(size_t bits, const size_t i, const size_t j) + -> size_t { size_t x = ((bits >> i) ^ (bits >> j)) & 1U; - bits ^= ((x << i) | (x << j)); + return bits ^ ((x << i) | (x << j)); } } // namespace Pennylane::Util From 16df1251c6f79c3e31f7f49f241fdb42c16f9ff4 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 20:54:48 -0500 Subject: [PATCH 61/94] Trigger CI From 6908f2086f235630d0681a28476a57dc1a1386a3 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 9 Mar 2022 21:28:20 -0500 Subject: [PATCH 62/94] Futher update in tests --- pennylane_lightning/src/tests/TestHelpers.hpp | 52 ---------- .../tests/Test_GateImplementations_Matrix.cpp | 1 + .../src/tests/Test_Internal.cpp | 98 ++++++++++++++----- pennylane_lightning/src/tests/Test_Util.cpp | 32 ++++++ 4 files changed, 104 insertions(+), 79 deletions(-) diff --git a/pennylane_lightning/src/tests/TestHelpers.hpp b/pennylane_lightning/src/tests/TestHelpers.hpp index 655a43079d..6831a97b76 100644 --- a/pennylane_lightning/src/tests/TestHelpers.hpp +++ b/pennylane_lightning/src/tests/TestHelpers.hpp @@ -294,58 +294,6 @@ auto createParams(Gates::GateOperation op) -> std::vector { } return {}; } -/** - * @brief Generate random unitary matrix - * - * @return Generated unitary matrix in row-major format - */ -template -auto randomUnitary(RandomEngine &re, size_t num_qubits) - -> std::vector> { - using ComplexPrecisionT = std::complex; - const size_t dim = (1U << num_qubits); - std::vector res(dim * dim, ComplexPrecisionT{}); - - std::normal_distribution dist; - - auto generator = [&dist, &re]() -> ComplexPrecisionT { - return ComplexPrecisionT{dist(re), dist(re)}; - }; - - std::generate(res.begin(), res.end(), generator); - - // Simple algorithm to make rows orthogonal with Gram-Schmidt - // This algorithm is unstable but works for a small matrix. - // Use QR decomposition when we have LAPACK support. - - for (size_t row2 = 0; row2 < dim; row2++) { - ComplexPrecisionT *row2_p = res.data() + row2 * dim; - for (size_t row1 = 0; row1 < row2; row1++) { - const ComplexPrecisionT *row1_p = res.data() + row1 * dim; - ComplexPrecisionT dot12 = Util::innerProdC(row1_p, row2_p, dim); - ComplexPrecisionT dot11 = squaredNorm(row1_p, dim); - - // orthogonalize row2 - std::transform( - row2_p, row2_p + dim, row1_p, row2_p, - [scale = dot12 / dot11](auto &elt2, const auto &elt1) { - return elt2 - scale * elt1; - }); - } - } - - // Normalize each row - for (size_t row = 0; row < dim; row++) { - ComplexPrecisionT *row_p = res.data() + row * dim; - PrecisionT norm2 = std::sqrt(squaredNorm(row_p, dim)); - - // noramlize row2 - std::transform(row_p, row_p + dim, row_p, [norm2](const auto c) { - return (static_cast(1.0) / norm2) * c; - }); - } - return res; -} template struct PrecisionToName; diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp index dfda96073f..a49440aa20 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp @@ -6,6 +6,7 @@ #include using namespace Pennylane; +using Pennylane::Util::randomUnitary; template using ApplyMatrixType = void (*)(std::complex *, size_t, diff --git a/pennylane_lightning/src/tests/Test_Internal.cpp b/pennylane_lightning/src/tests/Test_Internal.cpp index 00ead21271..33b5fef81a 100644 --- a/pennylane_lightning/src/tests/Test_Internal.cpp +++ b/pennylane_lightning/src/tests/Test_Internal.cpp @@ -1,8 +1,10 @@ +#include "CreateAllWires.hpp" #include "GateImplementationsPI.hpp" #include "TestHelpers.hpp" #include +#include #include /** @@ -83,35 +85,77 @@ TEMPLATE_TEST_CASE("createProductState", "[Test_Internal]", float, double) { } } -/** - * @brief Test randomUnitary is correct - */ -TEMPLATE_TEST_CASE("randomUnitary", "[Test_Internal]", float, double) { - using PrecisionT = TestType; - - std::mt19937 re{1337}; - - for (size_t num_qubits = 1; num_qubits <= 5; num_qubits++) { - const size_t dim = (1U << num_qubits); - const auto unitary = randomUnitary(re, num_qubits); - - std::vector> unitary_dagger = - Util::Transpose(unitary, dim, dim); - std::transform( - unitary_dagger.begin(), unitary_dagger.end(), - unitary_dagger.begin(), - [](const std::complex &v) { return std::conj(v); }); +size_t binomialCeff(size_t n, size_t r) { + size_t num = 1; + size_t dem = 1; + for (size_t k = 0; k < r; k++) { + num *= (n - k); + } + for (size_t k = 1; k <= r; k++) { + dem *= k; + } + return num / dem; +} - std::vector> mat(dim * dim); - Util::matrixMatProd(unitary.data(), unitary_dagger.data(), mat.data(), - dim, dim, dim); +size_t permSize(size_t n, size_t r) { + size_t res = 1; + for (size_t k = 0; k < r; k++) { + res *= (n - k); + } + return res; +} - std::vector> identity( - dim * dim, std::complex{}); - for (size_t i = 0; i < dim; i++) { - identity[i * dim + i] = std::complex{1.0, 0.0}; +/** + * @brief Test create all wires + */ +TEST_CASE("createAllWires", "[Test_Internal]") { + SECTION("order = false") { + const std::vector> test_pairs{ + {4, 2}, {8, 3}, {12, 1}, {12, 2}, {12, 3}, {12, 4}, {12, 5}, + {12, 6}, {12, 7}, {12, 8}, {12, 9}, {12, 10}, {12, 11}, {12, 12}}; + + for (const auto &[n, r] : test_pairs) { + std::vector> vec; + auto v = CombinationGenerator(n, r).all_perms(); + + REQUIRE(v.size() == binomialCeff(n, r)); + for (const auto &perm : v) { + REQUIRE(perm.size() == r); + vec.emplace_back(perm.begin(), perm.end()); + } + + std::sort(v.begin(), v.end(), + [](const std::vector &v1, + const std::vector &v2) { + return std::lexicographical_compare( + v1.begin(), v1.end(), v2.begin(), v2.end()); + }); // sort lexicographically + for (size_t i = 0; i < v.size() - 1; i++) { + REQUIRE(v[i] != v[i + 1]); // all combinations must be different + } + } + } + SECTION("order = true") { + const std::vector> test_pairs{ + {4, 2}, {8, 3}, {12, 1}, {12, 2}, {12, 3}, {12, 4}, {12, 5}}; + + for (const auto &[n, r] : test_pairs) { + auto v = PermutationGenerator(n, r).all_perms(); + + REQUIRE(v.size() == permSize(n, r)); + for (const auto &perm : v) { + REQUIRE(perm.size() == r); + } + + std::sort(v.begin(), v.end(), + [](const std::vector &v1, + const std::vector &v2) { + return std::lexicographical_compare( + v1.begin(), v1.end(), v2.begin(), v2.end()); + }); // sort lexicographically + for (size_t i = 0; i < v.size() - 1; i++) { + REQUIRE(v[i] != v[i + 1]); // all permutations must be different + } } - - REQUIRE(mat == PLApprox(identity).margin(1e-5)); } } diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp index 18c0a7c4b9..7986ee9ef5 100644 --- a/pennylane_lightning/src/tests/Test_Util.cpp +++ b/pennylane_lightning/src/tests/Test_Util.cpp @@ -594,3 +594,35 @@ TEST_CASE("Utility array and tuples", "[Util]") { std::pair("Four", 4), }); } + +/** + * @brief Test randomUnitary is correct + */ +TEMPLATE_TEST_CASE("randomUnitary", "[Test_Internal]", float, double) { + using PrecisionT = TestType; + + std::mt19937 re{1337}; + + for (size_t num_qubits = 1; num_qubits <= 5; num_qubits++) { + const size_t dim = (1U << num_qubits); + const auto unitary = Util::randomUnitary(re, num_qubits); + + auto unitary_dagger = Util::Transpose(unitary, dim, dim); + std::transform( + unitary_dagger.begin(), unitary_dagger.end(), + unitary_dagger.begin(), + [](const std::complex &v) { return std::conj(v); }); + + std::vector> mat(dim * dim); + Util::matrixMatProd(unitary.data(), unitary_dagger.data(), mat.data(), + dim, dim, dim); + + std::vector> identity( + dim * dim, std::complex{}); + for (size_t i = 0; i < dim; i++) { + identity[i * dim + i] = std::complex{1.0, 0.0}; + } + + REQUIRE(mat == PLApprox(identity).margin(1e-5)); + } +} From fd2c3f6d0bdf9a17c47312c08ce689ac107a4f4a Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 14 Mar 2022 16:09:25 -0400 Subject: [PATCH 63/94] Apply suggestions from code review Co-authored-by: Ali Asadi --- doc/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 37a6be4452..b94e1c5983 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -72,7 +72,7 @@ def __getattr__(cls, name): # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -needs_sphinx = "1.6" +needs_sphinx = "3.3" # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -229,7 +229,7 @@ def __getattr__(cls, name): "download_button": "#19b37b", } -edit_on_github_project = "XanaduAI/pennylane-lightning" +edit_on_github_project = "PennyLaneAI/pennylane-lightning" edit_on_github_branch = "master/doc" # ============================================================ From de62ece3e8b1030f19558265e6fef4c497d2603b Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 14 Mar 2022 16:10:34 -0400 Subject: [PATCH 64/94] Update pennylane_lightning/src/bindings/Bindings.cpp Co-authored-by: Ali Asadi --- pennylane_lightning/src/bindings/Bindings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/src/bindings/Bindings.cpp b/pennylane_lightning/src/bindings/Bindings.cpp index 73b98f8c55..8cdabbf2ed 100644 --- a/pennylane_lightning/src/bindings/Bindings.cpp +++ b/pennylane_lightning/src/bindings/Bindings.cpp @@ -367,7 +367,7 @@ PYBIND11_MODULE(lightning_qubit_ops, // NOLINT: No control over Pybind internals /* Add compile info */ m.def("compile_info", &getCompileInfo, "Compiled binary information."); - /* Add compile info */ + /* Add runtime info */ m.def("runtime_info", &getRuntimeInfo, "Runtime information."); /* Add EXPORTED_KERNELS */ From 6507b59c4f181515cf9a4e038d0ee0d0adb4c031 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 14 Mar 2022 17:15:47 -0400 Subject: [PATCH 65/94] Fix PLApprox --- pennylane_lightning/src/tests/TestHelpers.hpp | 15 +++- ...est_GateImplementations_CompareKernels.cpp | 4 +- .../Test_GateImplementations_Generator.cpp | 2 +- .../Test_GateImplementations_Inverse.cpp | 2 +- .../tests/Test_GateImplementations_Matrix.cpp | 32 ++++---- .../Test_GateImplementations_Nonparam.cpp | 8 +- .../tests/Test_GateImplementations_Param.cpp | 78 +++++++++---------- .../src/tests/Test_Internal.cpp | 10 +-- pennylane_lightning/src/tests/Test_Util.cpp | 24 +++--- 9 files changed, 93 insertions(+), 82 deletions(-) diff --git a/pennylane_lightning/src/tests/TestHelpers.hpp b/pennylane_lightning/src/tests/TestHelpers.hpp index 6831a97b76..3b2fca4eba 100644 --- a/pennylane_lightning/src/tests/TestHelpers.hpp +++ b/pennylane_lightning/src/tests/TestHelpers.hpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -27,7 +28,7 @@ template struct is_complex> : std::true_type {}; template constexpr bool is_complex_v = is_complex::value; -template struct PLApprox { +template > struct PLApprox { const std::vector &comp_; explicit PLApprox(const std::vector &comp) : comp_{comp} {} @@ -78,6 +79,16 @@ template struct PLApprox { return *this; } }; + +/** + * @brief Simple helper for PLApprox for the cases when the class template + * deduction does not work well. + */ +template +PLApprox approx(const std::vector &vec) { + return PLApprox(vec); +} + template std::ostream &operator<<(std::ostream &os, const PLApprox &approx) { os << approx.describe(); @@ -110,7 +121,7 @@ isApproxEqual(const std::vector &data1, const typename Data_t::value_type eps = std::numeric_limits::epsilon() * 100) { - return data1 == PLApprox(data2).epsilon(eps); + return data1 == PLApprox(data2).epsilon(eps); } /** diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp index c66f07e522..98bd3c0870 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp @@ -131,7 +131,7 @@ void testApplyGate(RandomEngine &re, size_t num_qubits) { for (size_t i = 0; i < results.size() - 1; i++) { REQUIRE(results[i] == - PLApprox(results[i + 1]) + approx(results[i + 1]) .margin(static_cast(1e-5))); } } @@ -147,7 +147,7 @@ void testApplyGate(RandomEngine &re, size_t num_qubits) { for (size_t i = 0; i < results.size() - 1; i++) { REQUIRE(results[i] == - PLApprox(results[i + 1]) + approx(results[i + 1]) .margin(static_cast(1e-5))); } } diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp index fd045742e9..d2ecd00a4a 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp @@ -114,7 +114,7 @@ void testGeneratorForGate(RandomEngine &re, size_t num_qubits) { scaleVector(gate_der_st, static_cast(0.5) / eps); - REQUIRE(gntr_st == PLApprox(gate_der_st).margin(1e-3)); + REQUIRE(gntr_st == approx(gate_der_st).margin(1e-3)); } } template (re); diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp index a49440aa20..90ede20986 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Matrix.cpp @@ -83,7 +83,7 @@ void testApplyMatrix() { auto st = ini_st; GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, false); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -139,7 +139,7 @@ void testApplyMatrix() { auto st = ini_st; GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, false); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -195,7 +195,7 @@ void testApplyMatrix() { auto st = ini_st; GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, false); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -263,7 +263,7 @@ void testApplyMatrix() { auto st = ini_st; GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, false); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -331,7 +331,7 @@ void testApplyMatrix() { auto st = ini_st; GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, false); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -448,7 +448,7 @@ void testApplyMatrix() { auto st = ini_st; GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, false); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -757,7 +757,7 @@ void testApplyMatrix() { auto st = ini_st; GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, false); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } } @@ -803,7 +803,7 @@ void testApplyMatrixInverse() { wires, false); GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, true); - REQUIRE(st == PLApprox(ini_st).margin(1e-5)); + REQUIRE(st == approx(ini_st).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -821,7 +821,7 @@ void testApplyMatrixInverse() { GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, true); - REQUIRE(st == PLApprox(ini_st).margin(1e-5)); + REQUIRE(st == approx(ini_st).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -839,7 +839,7 @@ void testApplyMatrixInverse() { GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, true); - REQUIRE(st == PLApprox(ini_st).margin(1e-5)); + REQUIRE(st == approx(ini_st).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -857,7 +857,7 @@ void testApplyMatrixInverse() { GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, true); - REQUIRE(st == PLApprox(ini_st).margin(1e-5)); + REQUIRE(st == approx(ini_st).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -875,7 +875,7 @@ void testApplyMatrixInverse() { GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, true); - REQUIRE(st == PLApprox(ini_st).margin(1e-5)); + REQUIRE(st == approx(ini_st).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name << ", wires = {1,2} - " @@ -891,7 +891,7 @@ void testApplyMatrixInverse() { GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, true); - REQUIRE(st == PLApprox(ini_st).margin(1e-5)); + REQUIRE(st == approx(ini_st).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name << ", wires = {1,3} - " @@ -907,7 +907,7 @@ void testApplyMatrixInverse() { GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, true); - REQUIRE(st == PLApprox(ini_st).margin(1e-5)); + REQUIRE(st == approx(ini_st).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -924,7 +924,7 @@ void testApplyMatrixInverse() { GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, true); - REQUIRE(st == PLApprox(ini_st).margin(1e-5)); + REQUIRE(st == approx(ini_st).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name << ", wires = {0,1,2,3} - " @@ -940,7 +940,7 @@ void testApplyMatrixInverse() { GateImplementation::applyMatrix(st.data(), num_qubits, matrix.data(), wires, true); - REQUIRE(st == PLApprox(ini_st).margin(1e-5)); + REQUIRE(st == approx(ini_st).margin(1e-5)); } } diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp index 49eb353529..c5b3e01227 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Nonparam.cpp @@ -99,7 +99,7 @@ void testApplyPauliY() { GateImplementation::applyPauliY(st.data(), num_qubits, {index}, false); - CHECK(st == PLApprox(expected_results[index])); + CHECK(st == approx(expected_results[index])); } } PENNYLANE_RUN_TEST(PauliY); @@ -122,7 +122,7 @@ void testApplyPauliZ() { auto st = createPlusState(num_qubits); GateImplementation::applyPauliZ(st.data(), num_qubits, {index}, false); - CHECK(st == PLApprox(expected_results[index])); + CHECK(st == approx(expected_results[index])); } } PENNYLANE_RUN_TEST(PauliZ); @@ -168,7 +168,7 @@ template void testApplyS() { GateImplementation::applyS(st.data(), num_qubits, {index}, false); - CHECK(st == PLApprox(expected_results[index])); + CHECK(st == approx(expected_results[index])); } } PENNYLANE_RUN_TEST(S); @@ -191,7 +191,7 @@ template void testApplyT() { GateImplementation::applyT(st.data(), num_qubits, {index}, false); - CHECK(st == PLApprox(expected_results[index])); + CHECK(st == approx(expected_results[index])); } } PENNYLANE_RUN_TEST(T); diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp index 894038c514..33ec8656a7 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Param.cpp @@ -107,7 +107,7 @@ void testApplyPhaseShift() { GateImplementation::applyPhaseShift(st.data(), num_qubits, {index}, false, {angles[index]}); - CHECK(st == PLApprox(expected_results[index])); + CHECK(st == approx(expected_results[index])); } } PENNYLANE_RUN_TEST(PhaseShift); @@ -132,7 +132,7 @@ void testApplyRX() { GateImplementation::applyRX(st.data(), num_qubits, {0}, false, {angles[index]}); - CHECK(st == PLApprox(expected_results[index]).epsilon(1e-7)); + CHECK(st == approx(expected_results[index]).epsilon(1e-7)); } } PENNYLANE_RUN_TEST(RX); @@ -172,7 +172,7 @@ void testApplyRY() { auto st = init_state; GateImplementation::applyRY(st.data(), num_qubits, {0}, false, {angles[index]}); - CHECK(st == PLApprox(expected_results[index]).epsilon(1e-5)); + CHECK(st == approx(expected_results[index]).epsilon(1e-5)); } } } @@ -220,7 +220,7 @@ void testApplyRZ() { GateImplementation::applyRZ(st.data(), num_qubits, {index}, false, {angles[index]}); - CHECK(st == PLApprox(expected_results[index])); + CHECK(st == approx(expected_results[index])); } } PENNYLANE_RUN_TEST(RZ); @@ -254,7 +254,7 @@ void testApplyRot() { angles[index][0], angles[index][1], angles[index][2]); - CHECK(st == PLApprox(expected_results[index])); + CHECK(st == approx(expected_results[index])); } } PENNYLANE_RUN_TEST(Rot); @@ -289,7 +289,7 @@ void testApplyIsingXX() { auto st = ini_st; GateImplementation::applyIsingXX(st.data(), num_qubits, {0, 1}, false, angle); - REQUIRE(st == PLApprox(expected_results).margin(1e-7)); + REQUIRE(st == approx(expected_results).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", IsingXX0,1 |100> -> a|100> + b|010> - " @@ -312,7 +312,7 @@ void testApplyIsingXX() { auto st = ini_st; GateImplementation::applyIsingXX(st.data(), num_qubits, {0, 1}, false, angle); - REQUIRE(st == PLApprox(expected_results).margin(1e-7)); + REQUIRE(st == approx(expected_results).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", IsingXX0,1 |010> -> a|010> + b|100> - " @@ -335,7 +335,7 @@ void testApplyIsingXX() { auto st = ini_st; GateImplementation::applyIsingXX(st.data(), num_qubits, {0, 1}, false, angle); - REQUIRE(st == PLApprox(expected_results).margin(1e-7)); + REQUIRE(st == approx(expected_results).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", IsingXX0,1 |110> -> a|110> + b|000> - " @@ -358,7 +358,7 @@ void testApplyIsingXX() { auto st = ini_st; GateImplementation::applyIsingXX(st.data(), num_qubits, {0, 1}, false, angle); - REQUIRE(st == PLApprox(expected_results).margin(1e-7)); + REQUIRE(st == approx(expected_results).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", IsingXX0,2 - " @@ -390,7 +390,7 @@ void testApplyIsingXX() { auto st = ini_st; GateImplementation::applyIsingXX(st.data(), num_qubits, wires, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } } PENNYLANE_RUN_TEST(IsingXX); @@ -422,7 +422,7 @@ void testApplyIsingYY() { auto st = ini_st; GateImplementation::applyIsingYY(st.data(), num_qubits, {0, 1}, false, angle); - REQUIRE(st == PLApprox(expected_results).margin(1e-7)); + REQUIRE(st == approx(expected_results).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", IsingYY0,1 |100> -> a|100> + b|010> - " @@ -445,7 +445,7 @@ void testApplyIsingYY() { auto st = ini_st; GateImplementation::applyIsingYY(st.data(), num_qubits, {0, 1}, false, angle); - REQUIRE(st == PLApprox(expected_results).margin(1e-7)); + REQUIRE(st == approx(expected_results).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", IsingYY0,1 |010> -> a|010> + b|100> - " @@ -468,7 +468,7 @@ void testApplyIsingYY() { auto st = ini_st; GateImplementation::applyIsingYY(st.data(), num_qubits, {0, 1}, false, angle); - REQUIRE(st == PLApprox(expected_results).margin(1e-7)); + REQUIRE(st == approx(expected_results).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", IsingYY0,1 |110> -> a|110> + b|000> - " @@ -491,7 +491,7 @@ void testApplyIsingYY() { auto st = ini_st; GateImplementation::applyIsingYY(st.data(), num_qubits, {0, 1}, false, angle); - REQUIRE(st == PLApprox(expected_results).margin(1e-7)); + REQUIRE(st == approx(expected_results).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", IsingYY0,1 - " @@ -542,7 +542,7 @@ void testApplyIsingYY() { auto st = ini_st; GateImplementation::applyIsingYY(st.data(), num_qubits, wires, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } } PENNYLANE_RUN_TEST(IsingYY); @@ -574,7 +574,7 @@ void testApplyIsingZZ() { auto st = ini_st; GateImplementation::applyIsingZZ(st.data(), num_qubits, {0, 1}, false, angle); - REQUIRE(st == PLApprox(expected_results).margin(1e-7)); + REQUIRE(st == approx(expected_results).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", IsingZZ0,1 |100> -> |100> - " @@ -597,7 +597,7 @@ void testApplyIsingZZ() { auto st = ini_st; GateImplementation::applyIsingZZ(st.data(), num_qubits, {0, 1}, false, angle); - REQUIRE(st == PLApprox(expected_results).margin(1e-7)); + REQUIRE(st == approx(expected_results).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name @@ -621,7 +621,7 @@ void testApplyIsingZZ() { auto st = ini_st; GateImplementation::applyIsingZZ(st.data(), num_qubits, {0, 1}, false, angle); - REQUIRE(st == PLApprox(expected_results).margin(1e-7)); + REQUIRE(st == approx(expected_results).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name @@ -645,7 +645,7 @@ void testApplyIsingZZ() { auto st = ini_st; GateImplementation::applyIsingZZ(st.data(), num_qubits, {0, 1}, false, angle); - REQUIRE(st == PLApprox(expected_results).margin(1e-7)); + REQUIRE(st == approx(expected_results).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", IsingZZ0,1 - " @@ -696,7 +696,7 @@ void testApplyIsingZZ() { auto st = ini_st; GateImplementation::applyIsingZZ(st.data(), num_qubits, wires, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } } PENNYLANE_RUN_TEST(IsingZZ); @@ -734,7 +734,7 @@ void testApplyControlledPhaseShift() { GateImplementation::applyControlledPhaseShift(st.data(), num_qubits, {0, 1}, false, angles[0]); CAPTURE(st); - CHECK(st == PLApprox(expected_results[0])); + CHECK(st == approx(expected_results[0])); } PENNYLANE_RUN_TEST(ControlledPhaseShift); @@ -789,7 +789,7 @@ void testApplyCRX() { auto st = ini_st; GateImplementation::applyCRX(st.data(), num_qubits, wires, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name << ", CRX0,2 - " << PrecisionToName::value) { @@ -839,7 +839,7 @@ void testApplyCRX() { auto st = ini_st; GateImplementation::applyCRX(st.data(), num_qubits, wires, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name << ", CRX1,3 - " << PrecisionToName::value) { @@ -889,7 +889,7 @@ void testApplyCRX() { auto st = ini_st; GateImplementation::applyCRX(st.data(), num_qubits, wires, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } } PENNYLANE_RUN_TEST(CRX); @@ -946,7 +946,7 @@ void testApplyCRY() { auto st = ini_st; GateImplementation::applyCRY(st.data(), num_qubits, wires, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -997,7 +997,7 @@ void testApplyCRY() { auto st = ini_st; GateImplementation::applyCRY(st.data(), num_qubits, wires, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -1048,7 +1048,7 @@ void testApplyCRY() { auto st = ini_st; GateImplementation::applyCRY(st.data(), num_qubits, wires, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } } @@ -1106,7 +1106,7 @@ void testApplyCRZ() { auto st = ini_st; GateImplementation::applyCRZ(st.data(), num_qubits, wires, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -1157,7 +1157,7 @@ void testApplyCRZ() { auto st = ini_st; GateImplementation::applyCRZ(st.data(), num_qubits, wires, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } DYNAMIC_SECTION(GateImplementation::name @@ -1208,7 +1208,7 @@ void testApplyCRZ() { auto st = ini_st; GateImplementation::applyCRZ(st.data(), num_qubits, wires, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } } PENNYLANE_RUN_TEST(CRZ); @@ -1235,7 +1235,7 @@ void testApplyCRot() { GateImplementation::applyCRot(st.data(), num_qubits, {0, 1}, false, angles[0], angles[1], angles[2]); - CHECK(st == PLApprox(ini_st)); + CHECK(st == approx(ini_st)); } DYNAMIC_SECTION(GateImplementation::name << ", CRot0,1 |100> -> |1>(a|0>+b|1>)|0> - " @@ -1246,7 +1246,7 @@ void testApplyCRot() { GateImplementation::applyCRot(st.data(), num_qubits, {0, 1}, false, angles[0], angles[1], angles[2]); - CHECK(st == PLApprox(expected_results)); + CHECK(st == approx(expected_results)); } DYNAMIC_SECTION(GateImplementation::name @@ -1299,7 +1299,7 @@ void testApplyCRot() { auto st = ini_st; GateImplementation::applyCRot(st.data(), num_qubits, wires, false, phi, theta, omega); - REQUIRE(st == PLApprox(expected).margin(1e-5)); + REQUIRE(st == approx(expected).margin(1e-5)); } } PENNYLANE_RUN_TEST(CRot); @@ -1332,7 +1332,7 @@ void testApplyMultiRZ() { GateImplementation::applyMultiRZ(st.data(), num_qubits, {0}, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-7)); + REQUIRE(st == approx(expected).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", MultiRZ0 |++++> - " @@ -1355,7 +1355,7 @@ void testApplyMultiRZ() { GateImplementation::applyMultiRZ(st.data(), num_qubits, {0}, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-7)); + REQUIRE(st == approx(expected).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", MultiRZ01 |++++> - " @@ -1378,7 +1378,7 @@ void testApplyMultiRZ() { GateImplementation::applyMultiRZ(st.data(), num_qubits, {0, 1}, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-7)); + REQUIRE(st == approx(expected).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", MultiRZ012 |++++> - " @@ -1401,7 +1401,7 @@ void testApplyMultiRZ() { GateImplementation::applyMultiRZ(st.data(), num_qubits, {0, 1, 2}, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-7)); + REQUIRE(st == approx(expected).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name << ", MultiRZ0123 |++++> - " @@ -1424,7 +1424,7 @@ void testApplyMultiRZ() { GateImplementation::applyMultiRZ(st.data(), num_qubits, {0, 1, 2, 3}, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-7)); + REQUIRE(st == approx(expected).margin(1e-7)); } DYNAMIC_SECTION(GateImplementation::name @@ -1474,7 +1474,7 @@ void testApplyMultiRZ() { GateImplementation::applyMultiRZ(st.data(), num_qubits, wires, false, angle); - REQUIRE(st == PLApprox(expected).margin(1e-7)); + REQUIRE(st == approx(expected).margin(1e-7)); } } PENNYLANE_RUN_TEST(MultiRZ); diff --git a/pennylane_lightning/src/tests/Test_Internal.cpp b/pennylane_lightning/src/tests/Test_Internal.cpp index 33b5fef81a..d5fadfe14b 100644 --- a/pennylane_lightning/src/tests/Test_Internal.cpp +++ b/pennylane_lightning/src/tests/Test_Internal.cpp @@ -28,7 +28,7 @@ TEMPLATE_TEST_CASE("Approx", "[Test_Internal]", float, double) { ComplexPrecisionT{1.0001, 0.0}, ComplexPrecisionT{0.0, 0.9999}, }; - REQUIRE(test1 == PLApprox(test2).margin(0.00015)); + REQUIRE(test1 == approx(test2).margin(0.00015)); } SECTION("vector{1.0, 1.0*I} does not approx vector{1.0002, 0.9998*I} with " "margin 0.00015") { @@ -40,7 +40,7 @@ TEMPLATE_TEST_CASE("Approx", "[Test_Internal]", float, double) { ComplexPrecisionT{1.0002, 0.0}, ComplexPrecisionT{0.0, 0.9998}, }; - REQUIRE(test1 != PLApprox(test2).margin(0.00015)); + REQUIRE(test1 != approx(test2).margin(0.00015)); } SECTION("vector{1.0, 1.0*I} does not approx vector{1.0I, 1.0} with margin " "0.00015") { @@ -52,7 +52,7 @@ TEMPLATE_TEST_CASE("Approx", "[Test_Internal]", float, double) { ComplexPrecisionT{0.0, 1.0}, ComplexPrecisionT{1.0, 0.0}, }; - REQUIRE(test1 != PLApprox(test2).margin(0.00015)); + REQUIRE(test1 != approx(test2).margin(0.00015)); } } @@ -68,7 +68,7 @@ TEMPLATE_TEST_CASE("createProductState", "[Test_Internal]", float, double) { GateImplementationsPI::applyPauliX(expected.data(), 3, {1}, false); GateImplementationsPI::applyHadamard(expected.data(), 3, {1}, false); - REQUIRE(st == PLApprox(expected).margin(1e-7)); + REQUIRE(st == approx(expected).margin(1e-7)); } SECTION("createProductState(\"+-0\") == |+-1> ") { const auto st = createProductState("+-0"); @@ -81,7 +81,7 @@ TEMPLATE_TEST_CASE("createProductState", "[Test_Internal]", float, double) { GateImplementationsPI::applyPauliX(expected.data(), 3, {2}, false); - REQUIRE(st != PLApprox(expected).margin(1e-7)); + REQUIRE(st != approx(expected).margin(1e-7)); } } diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp index 7986ee9ef5..a286d7bba6 100644 --- a/pennylane_lightning/src/tests/Test_Util.cpp +++ b/pennylane_lightning/src/tests/Test_Util.cpp @@ -159,7 +159,7 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, CAPTURE(v_out); CAPTURE(v_expected); - CHECK(v_out == PLApprox(v_expected).margin(1e-7)); + CHECK(v_out == approx(v_expected).margin(1e-7)); } } SECTION("Random Complex") { @@ -185,7 +185,7 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, Util::matrixVecProd(mat, v_in, 4, 4); CAPTURE(v_out); - CHECK(v_out == PLApprox(v_expected).margin(1e-7)); + CHECK(v_out == approx(v_expected).margin(1e-7)); } SECTION("Invalid Arguments") { using namespace Catch::Matchers; @@ -215,7 +215,7 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, CAPTURE(v_out); CAPTURE(v_expected); - CHECK(v_out == PLApprox(v_expected).margin(1e-7)); + CHECK(v_out == approx(v_expected).margin(1e-7)); } } SECTION("Zero Vector") { @@ -229,7 +229,7 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, CAPTURE(v_out); CAPTURE(v_expected); - CHECK(v_out == PLApprox(v_expected).margin(1e-7)); + CHECK(v_out == approx(v_expected).margin(1e-7)); } } SECTION("Random Matrix") { @@ -242,7 +242,7 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, CAPTURE(v_out); CAPTURE(v_expected); - CHECK(v_out == PLApprox(v_expected).margin(1e-7)); + CHECK(v_out == approx(v_expected).margin(1e-7)); } } SECTION("Transpose") { @@ -258,7 +258,7 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, CAPTURE(mat_t); CAPTURE(mat); - CHECK(mat_t == PLApprox(mat).margin(1e-7)); + CHECK(mat_t == approx(mat).margin(1e-7)); } } SECTION("Random Complex") { @@ -286,7 +286,7 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, CAPTURE(mat_t); CAPTURE(mat_t_exp); - CHECK(mat_t == PLApprox(mat_t_exp)); + CHECK(mat_t == approx(mat_t_exp)); } SECTION("Invalid Arguments") { using namespace Catch::Matchers; @@ -311,7 +311,7 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, CAPTURE(m_out); CAPTURE(m_out_exp); - CHECK(m_out == PLApprox(m_out_exp)); + CHECK(m_out == approx(m_out_exp)); } } SECTION("Random Complex") { @@ -368,8 +368,8 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, CAPTURE(m_out_2); CAPTURE(m_out_exp); - CHECK(m_out_1 == PLApprox(m_out_2)); - CHECK(m_out_1 == PLApprox(m_out_exp)); + CHECK(m_out_1 == approx(m_out_2)); + CHECK(m_out_1 == approx(m_out_exp)); } SECTION("Random complex non-square") { const size_t m = 4; @@ -442,7 +442,7 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, const auto m_out = Util::matrixMatProd(mat1, mat2, m, n, k); - CHECK(m_out == PLApprox(expected)); + CHECK(m_out == approx(expected)); } SECTION("Invalid Arguments") { using namespace Catch::Matchers; @@ -623,6 +623,6 @@ TEMPLATE_TEST_CASE("randomUnitary", "[Test_Internal]", float, double) { identity[i * dim + i] = std::complex{1.0, 0.0}; } - REQUIRE(mat == PLApprox(identity).margin(1e-5)); + REQUIRE(mat == approx(identity).margin(1e-5)); } } From 1c450d2c4fd8f67f9dea5fac54205f3b57a0c83a Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 14 Mar 2022 17:16:13 -0400 Subject: [PATCH 66/94] Update pennylane_lightning/src/tests/CreateAllWires.cpp Co-authored-by: Ali Asadi --- pennylane_lightning/src/tests/CreateAllWires.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/pennylane_lightning/src/tests/CreateAllWires.cpp b/pennylane_lightning/src/tests/CreateAllWires.cpp index dd0194a625..65b3a10ffa 100644 --- a/pennylane_lightning/src/tests/CreateAllWires.cpp +++ b/pennylane_lightning/src/tests/CreateAllWires.cpp @@ -6,7 +6,6 @@ auto crateAllWires(size_t n_qubits, Gates::GateOperation gate_op, bool order) // make all possible 2^N permutations std::vector> res; res.reserve((1U << n_qubits) - 1); - ; for (size_t k = 1; k < (static_cast(1U) << n_qubits); k++) { std::vector wires; wires.reserve(Util::popcount(k)); From 131c626a77ca38c0076f70805a3480e17ca18649 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 14 Mar 2022 17:59:27 -0400 Subject: [PATCH 67/94] Fix createAllWires; Add NVCC/NVHPC compiler info --- .../src/tests/CreateAllWires.cpp | 2 +- .../src/tests/CreateAllWires.hpp | 2 +- ...est_GateImplementations_CompareKernels.cpp | 2 +- pennylane_lightning/src/util/Macros.hpp | 76 ++++++++++++++++++- pennylane_lightning/src/util/RuntimeInfo.cpp | 3 +- 5 files changed, 77 insertions(+), 8 deletions(-) diff --git a/pennylane_lightning/src/tests/CreateAllWires.cpp b/pennylane_lightning/src/tests/CreateAllWires.cpp index 65b3a10ffa..6bea13f39a 100644 --- a/pennylane_lightning/src/tests/CreateAllWires.cpp +++ b/pennylane_lightning/src/tests/CreateAllWires.cpp @@ -1,6 +1,6 @@ #include "CreateAllWires.hpp" namespace Pennylane { -auto crateAllWires(size_t n_qubits, Gates::GateOperation gate_op, bool order) +auto createAllWires(size_t n_qubits, Gates::GateOperation gate_op, bool order) -> std::vector> { if (Util::array_has_elt(Gates::Constant::multi_qubit_gates, gate_op)) { // make all possible 2^N permutations diff --git a/pennylane_lightning/src/tests/CreateAllWires.hpp b/pennylane_lightning/src/tests/CreateAllWires.hpp index 54d3cd9e9a..f462b4ae20 100644 --- a/pennylane_lightning/src/tests/CreateAllWires.hpp +++ b/pennylane_lightning/src/tests/CreateAllWires.hpp @@ -87,6 +87,6 @@ class PermutationGenerator : public WiresGenerator { * @param gate_op Gate operation * @param order Whether the ordering matters (if true, permutation is used) */ -auto crateAllWires(size_t n_qubits, Gates::GateOperation gate_op, bool order) +auto createAllWires(size_t n_qubits, Gates::GateOperation gate_op, bool order) -> std::vector>; } // namespace Pennylane diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp index 98bd3c0870..0c87e07154 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp @@ -115,7 +115,7 @@ void testApplyGate(RandomEngine &re, size_t num_qubits) { INFO("PrecisionT, ParamT = " << PrecisionToName::value << ", " << PrecisionToName::value); - const auto all_wires = crateAllWires(num_qubits, gate_op, true); + const auto all_wires = createAllWires(num_qubits, gate_op, true); for (const auto &wires : all_wires) { const auto params = createParams(gate_op); const auto gate_name = lookup(gate_names, gate_op); diff --git a/pennylane_lightning/src/util/Macros.hpp b/pennylane_lightning/src/util/Macros.hpp index a8cb8c1d7d..43d44daaab 100644 --- a/pennylane_lightning/src/util/Macros.hpp +++ b/pennylane_lightning/src/util/Macros.hpp @@ -70,7 +70,7 @@ #endif #if (_OPENMP >= 202011) -#define PL_UNROLL_LOOP __Pragma("omp unroll(8)") +#define PL_UNROLL_LOOP _Pragma("omp unroll(8)") #elif defined(__GNUC__) #define PL_UNROLL_LOOP _Pragma("GCC unroll 8") #elif defined(__clang__) @@ -135,27 +135,97 @@ constexpr auto getCPUArchMSVC() { [[maybe_unused]] constexpr static auto cpu_arch = CPUArch::Unknown; #endif -enum class Compiler { GCC, Clang, MSVC, Unknown }; +enum class Compiler { GCC, Clang, MSVC, NVCC, NVHPC, Unknown }; +/** + * @brief When none of the specialized functions is called. + */ template constexpr auto getCompilerVersion() -> std::string_view { return "Unknown version"; } +/** + * @brief Create version string for GCC. + * + * This function raises an error when instantiated (invoked) if a compiler + * does not define macros (i.e. other than GCC compatible compilers). + */ template <> constexpr auto getCompilerVersion() -> std::string_view { return PL_TO_STR(__GNUC__) "." PL_TO_STR(__GNUC_MINOR__) "." PL_TO_STR( __GNUC_PATCHLEVEL__); } + +/** + * @brief Create version string for Clang. + * + * This function raises an error when instantiated (invoked) if a compiler + * does not define macros (i.e. other than Clang). + */ template <> constexpr auto getCompilerVersion() -> std::string_view { return PL_TO_STR(__clang_major__) "." PL_TO_STR( __clang_minor__) "." PL_TO_STR(__clang_patchlevel__); } + +/** + * @brief Create version string for MSVC. + * + * This function raises an error when instantiated (invoked) if a compiler + * does not define macros (i.e. other than MSVC). + */ template <> constexpr auto getCompilerVersion() -> std::string_view { return PL_TO_STR(_MSC_FULL_VER); } -#if defined(__GNUC__) && !defined(__llvm__) && !defined(__INTEL_COMPILER) + +/** + * @brief Create version string for NVCC. + * + * This function raises an error when instantiated (invoked) if a compiler + * does not define macros (i.e. other than NVCC). + */ +template <> +constexpr auto getCompilerVersion() -> std::string_view { + return PL_TO_STR(__CUDACC_VER_MAJOR__) "." PL_TO_STR( + __CUDACC_VER_MINOR__) "." PL_TO_STR(__CUDACC_VER_BUILD__); +} + +/** + * @brief Create version string for NVCC. + * + * This function raises an error when instantiated (invoked) if a compiler + * does not define macros (i.e. other than NVCC). + * + * See + * https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#nvcc-identification-macro + * for related information + */ +template <> +constexpr auto getCompilerVersion() -> std::string_view { + return PL_TO_STR(__CUDACC_VER_MAJOR__) "." PL_TO_STR( + __CUDACC_VER_MINOR__) "." PL_TO_STR(__CUDACC_VER_BUILD__); +} + +/** + * @brief Create version string for NVHPC (C/C++ compilers without CUDA from + * NVIDIA). + * + * This function raises an error when instantiated (invoked) if a compiler + * does not define macros (i.e. other than NVHPC). + */ +template <> +constexpr auto getCompilerVersion() -> std::string_view { + return PL_TO_STR(__NVCOMPILER_MAJOR__) "." PL_TO_STR( + __NVCOMPILER_MINOR__) "." PL_TO_STR(__NVCOMPILER_PATCHLEVEL__); +} + +#if defined(__NVCC__) +[[maybe_unused]] constexpr static auto compiler = Compiler::NVCC; +#elif defined(__NVCOMPILER) +[[maybe_unused]] constexpr static auto compiler = Compiler::NVHPC; +#elif defined(__GNUC__) && !defined(__llvm__) && !defined(__INTEL_COMPILER) +// All GCC compatible compilers define __GNUC__. [[maybe_unused]] constexpr static auto compiler = Compiler::GCC; #elif defined(__clang__) [[maybe_unused]] constexpr static auto compiler = Compiler::Clang; diff --git a/pennylane_lightning/src/util/RuntimeInfo.cpp b/pennylane_lightning/src/util/RuntimeInfo.cpp index 5a208cb540..6d89794615 100644 --- a/pennylane_lightning/src/util/RuntimeInfo.cpp +++ b/pennylane_lightning/src/util/RuntimeInfo.cpp @@ -13,11 +13,10 @@ // limitations under the License. #include "RuntimeInfo.hpp" -#include - #if defined(__GNUC__) || defined(__clang__) #include #elif defined(_MSC_VER) +#include #include #endif From 02b5e332600f713e1772421217e1ce8c9138ada1 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 14 Mar 2022 18:06:41 -0400 Subject: [PATCH 68/94] Fix --- pennylane_lightning/src/util/Macros.hpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pennylane_lightning/src/util/Macros.hpp b/pennylane_lightning/src/util/Macros.hpp index 43d44daaab..c4c20d07bb 100644 --- a/pennylane_lightning/src/util/Macros.hpp +++ b/pennylane_lightning/src/util/Macros.hpp @@ -179,18 +179,6 @@ constexpr auto getCompilerVersion() -> std::string_view { return PL_TO_STR(_MSC_FULL_VER); } -/** - * @brief Create version string for NVCC. - * - * This function raises an error when instantiated (invoked) if a compiler - * does not define macros (i.e. other than NVCC). - */ -template <> -constexpr auto getCompilerVersion() -> std::string_view { - return PL_TO_STR(__CUDACC_VER_MAJOR__) "." PL_TO_STR( - __CUDACC_VER_MINOR__) "." PL_TO_STR(__CUDACC_VER_BUILD__); -} - /** * @brief Create version string for NVCC. * From 351841cad464bfb8c119280ce9667f61c4b7be8f Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 14 Mar 2022 19:58:36 -0400 Subject: [PATCH 69/94] Add test for squaredNorm --- pennylane_lightning/src/tests/TestHelpers.hpp | 39 ++++------------- pennylane_lightning/src/tests/Test_Util.cpp | 22 ++++++++++ .../src/util/LinearAlgebra.hpp | 42 +++++++++++++++---- pennylane_lightning/src/util/Util.hpp | 12 ++++++ 4 files changed, 77 insertions(+), 38 deletions(-) diff --git a/pennylane_lightning/src/tests/TestHelpers.hpp b/pennylane_lightning/src/tests/TestHelpers.hpp index 3b2fca4eba..a5c87328f5 100644 --- a/pennylane_lightning/src/tests/TestHelpers.hpp +++ b/pennylane_lightning/src/tests/TestHelpers.hpp @@ -16,25 +16,14 @@ #include namespace Pennylane { -template struct remove_complex { using type = T; }; -template struct remove_complex> { - using type = T; -}; -template using remove_complex_t = typename remove_complex::type; - -template struct is_complex : std::false_type {}; - -template struct is_complex> : std::true_type {}; - -template constexpr bool is_complex_v = is_complex::value; - template > struct PLApprox { const std::vector &comp_; explicit PLApprox(const std::vector &comp) : comp_{comp} {} - remove_complex_t margin_{}; - remove_complex_t epsilon_ = std::numeric_limits::epsilon() * 100; + Util::remove_complex_t margin_{}; + Util::remove_complex_t epsilon_ = + std::numeric_limits::epsilon() * 100; template [[nodiscard]] bool compare(const std::vector &lhs) const { @@ -43,7 +32,7 @@ template > struct PLApprox { } for (size_t i = 0; i < lhs.size(); i++) { - if constexpr (is_complex_v) { + if constexpr (Util::is_complex_v) { if (lhs[i].real() != Approx(comp_[i].real()) .epsilon(epsilon_) .margin(margin_) || @@ -61,6 +50,7 @@ template > struct PLApprox { } return true; } + [[nodiscard]] std::string describe() const { std::ostringstream ss; ss << "is Approx to {"; @@ -70,11 +60,12 @@ template > struct PLApprox { ss << "}" << std::endl; return ss.str(); } - PLApprox &epsilon(remove_complex_t eps) { + + PLApprox &epsilon(Util::remove_complex_t eps) { epsilon_ = eps; return *this; } - PLApprox &margin(remove_complex_t m) { + PLApprox &margin(Util::remove_complex_t m) { margin_ = m; return *this; } @@ -198,18 +189,6 @@ auto createPlusState(size_t num_qubits) return res; } -/** - * @brief Calculate the squared norm of a vector - */ -template -auto squaredNorm(const std::complex *data, size_t data_size) - -> PrecisionT { - return std::transform_reduce( - data, data + data_size, PrecisionT{}, std::plus(), - static_cast &)>( - &std::norm)); -} - /** * @brief create a random state */ @@ -223,7 +202,7 @@ auto createRandomState(RandomEngine &re, size_t num_qubits) } scaleVector(res, std::complex{1.0, 0.0} / - std::sqrt(squaredNorm(res.data(), res.size()))); + std::sqrt(Util::squaredNorm(res.data(), res.size()))); return res; } diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp index a286d7bba6..0b4d4f7e9d 100644 --- a/pennylane_lightning/src/tests/Test_Util.cpp +++ b/pennylane_lightning/src/tests/Test_Util.cpp @@ -568,6 +568,28 @@ TEST_CASE("Utility bit operations", "[Util][BitUtil]") { } } } + + SECTION("SquaredNorm") { + { // for float + std::vector vec{0.0, 1.0, 3.0, 10.0}; + CHECK(Util::squaredNorm(vec) == Approx(110.0)); + } + + { // for double + std::vector vec{0.0, 1.0, 3.0, 10.0}; + CHECK(Util::squaredNorm(vec) == Approx(110.0)); + } + + { // for complex + std::vector> vec{{0.0, 1.0}, {3.0, 10.0}}; + CHECK(Util::squaredNorm(vec) == Approx(110.0)); + } + + { // for complex + std::vector> vec{{0.0, 1.0}, {3.0, 10.0}}; + CHECK(Util::squaredNorm(vec) == Approx(110.0)); + } + } } TEST_CASE("Utility array and tuples", "[Util]") { diff --git a/pennylane_lightning/src/util/LinearAlgebra.hpp b/pennylane_lightning/src/util/LinearAlgebra.hpp index 1cf36e7be9..26a57a4e2f 100644 --- a/pennylane_lightning/src/util/LinearAlgebra.hpp +++ b/pennylane_lightning/src/util/LinearAlgebra.hpp @@ -750,15 +750,41 @@ inline auto matrixMatProd(const std::vector> m_left, } /** - * @brief Calculate the squared norm of a vector + * @brief @rst + * Compute the squared norm of a real/complex vector :math:`\sum_k |v_k|^2` + * @endrst + * + * @param data Data pointer + * @param data_size Size of the data + */ +template +auto squaredNorm(const T *data, size_t data_size) -> remove_complex_t { + if constexpr (is_complex_v) { + // complex type + using PrecisionT = remove_complex_t; + return std::transform_reduce( + data, data + data_size, PrecisionT{}, std::plus(), + static_cast &)>( + &std::norm)); + } else { + using PrecisionT = T; + return std::transform_reduce( + data, data + data_size, PrecisionT{}, std::plus(), + static_cast(std::norm)); + } +} + +/** + * @brief @rst + * Compute the squared norm of a real/complex vector :math:`\sum_k |v_k|^2` + * @endrst + * + * @param data Data pointer + * @param data_size Size of the data */ -template -auto squaredNorm(const std::complex *data, size_t data_size) - -> PrecisionT { - return std::transform_reduce( - data, data + data_size, PrecisionT{}, std::plus(), - static_cast &)>( - &std::norm)); +template +auto squaredNorm(const std::vector &vec) -> remove_complex_t { + return squaredNorm(vec.data(), vec.size()); } /** diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp index adbe6d9f42..b101dc9e35 100644 --- a/pennylane_lightning/src/util/Util.hpp +++ b/pennylane_lightning/src/util/Util.hpp @@ -405,4 +405,16 @@ auto chunkData(const Container &data, std::size_t num_chunks) // type alias template using remove_cvref_t = typename remove_cvref::type; +template struct remove_complex { using type = T; }; +template struct remove_complex> { + using type = T; +}; +template using remove_complex_t = typename remove_complex::type; + +template struct is_complex : std::false_type {}; + +template struct is_complex> : std::true_type {}; + +template constexpr bool is_complex_v = is_complex::value; + } // namespace Pennylane::Util From 92f0ee65fffcd152722fabe13a5dd646953de2f5 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 14 Mar 2022 20:22:30 -0400 Subject: [PATCH 70/94] Add correct arg --- bin/utils.py | 10 ++++++---- pennylane_lightning/src/bindings/Bindings.hpp | 4 ++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/bin/utils.py b/bin/utils.py index 6d9dab9420..6834078976 100644 --- a/bin/utils.py +++ b/bin/utils.py @@ -1,13 +1,13 @@ from pathlib import Path -import re -import fnmatch +from re import compile as re_compile +from fnmatch import fnmatch SRCFILE_EXT = ["c", "cc", "cpp", "cxx", "cu"] HEADERFILE_EXT = ["h", "hh", "hpp", "hxx", "cuh"] LIGHTNING_SOURCE_DIR = Path(__file__).resolve().parent.parent -rgx_gitignore_comment = re.compile("#.*$") +rgx_gitignore_comment = re_compile("#.*$") def get_cpp_files_from_path(path, ignore_patterns = None, use_gitignore = True, header_only = False): """return set of C++ source files from a path @@ -16,6 +16,7 @@ def get_cpp_files_from_path(path, ignore_patterns = None, use_gitignore = True, paths (pathlib.Path or str): a path to process ignore_patterns: patterns to ignore use_gitignore: find ignore patterns from .gitignore + header_only: find only header files when true """ path = Path(path) files_rel = set() # file paths relative to path @@ -44,7 +45,7 @@ def get_cpp_files_from_path(path, ignore_patterns = None, use_gitignore = True, files_to_remove = set() for ignore_pattern in ignore_patterns: for f in files_rel: - if fnmatch.fnmatch(str(f), ignore_pattern): + if fnmatch(str(f), ignore_pattern): files_to_remove.add(f) files_rel -= files_to_remove @@ -58,6 +59,7 @@ def get_cpp_files(paths, ignore_patterns = None, use_gitignore = True, header_on paths (list): list of all paths to process ignore_patterns: patterns to ignore use_gitignore: find ignore patterns from .gitignore + header_only: find only header files when true """ files = set() for path in paths: diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index d247144a8a..3f03d4a0d6 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -258,6 +258,10 @@ auto getCompileInfo() -> pybind11::dict { return "Clang"; case Compiler::MSVC: return "MSVC"; + case Compiler::NVCC: + return "NVCC"; + case Compiler::NVHPC: + return "NVHPC"; default: return "Unknown"; } From cbe36ace3c949551a90004bf2cd134721dd7556b Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 14 Mar 2022 20:56:12 -0400 Subject: [PATCH 71/94] Slightly refactor static_lookup --- .../src/gates/SelectKernel.hpp | 32 ------------------- .../src/simulator/DynamicDispatcher.cpp | 11 +++---- .../src/simulator/StateVectorBase.hpp | 10 +++--- .../src/tests/Test_DynamicDispatcher.cpp | 2 +- .../Test_GateImplementations_Generator.cpp | 6 ++-- .../Test_GateImplementations_Inverse.cpp | 2 +- .../src/tests/Test_OpToMemberFuncPtr.cpp | 2 +- .../src/util/LinearAlgebra.hpp | 3 +- pennylane_lightning/src/util/Util.hpp | 19 +++++++++++ 9 files changed, 35 insertions(+), 52 deletions(-) diff --git a/pennylane_lightning/src/gates/SelectKernel.hpp b/pennylane_lightning/src/gates/SelectKernel.hpp index 5057ed9b42..54056db379 100644 --- a/pennylane_lightning/src/gates/SelectKernel.hpp +++ b/pennylane_lightning/src/gates/SelectKernel.hpp @@ -28,38 +28,6 @@ #include namespace Pennylane::Gates { -/** - * @brief For lookup from any array of pair whose first elements are - * GateOperation. - * - * As Util::lookup can be used in constexpr context, this function is redundant - * (by the standard). But GCC 9 still does not accept Util::lookup in constexpr - * some cases. - */ -///@{ -template -constexpr auto -static_lookup(const std::array, size> &arr) -> T { - for (size_t idx = 0; idx < size; idx++) { - if (std::get<0>(arr[idx]) == op) { - return std::get<1>(arr[idx]); - } - } - return T{}; -} - -template -constexpr auto -static_lookup(const std::array, size> &arr) - -> T { - for (size_t idx = 0; idx < size; idx++) { - if (std::get<0>(arr[idx]) == op) { - return std::get<1>(arr[idx]); - } - } - return T{}; -} -///@} /// @cond DEV namespace Internal { diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.cpp b/pennylane_lightning/src/simulator/DynamicDispatcher.cpp index 315b7a102e..034612d573 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.cpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.cpp @@ -24,7 +24,6 @@ #include "SelectKernel.hpp" using namespace Pennylane; -using namespace Pennylane::Util; /// @cond DEV namespace { @@ -50,7 +49,7 @@ constexpr auto gateOpToFunctor() { Gates::GateOpToMemberFuncPtr::value; assert(params.size() == - Gates::static_lookup(Gates::Constant::gate_num_params)); + Util::static_lookup(Gates::Constant::gate_num_params)); Gates::callGateOps(func_ptr, data, num_qubits, wires, inverse, params); }; } @@ -77,7 +76,7 @@ constexpr auto constructGateOpsFunctorTupleIter() { return constructGateOpsFunctorTupleIter< PrecisionT, ParamT, GateImplementation, gate_idx + 1>(); } else { - return prepend_to_tuple( + return Util::prepend_to_tuple( std::pair{gate_op, gateOpToFunctor()}, @@ -97,7 +96,7 @@ constexpr auto constructGeneratorOpsFunctorTupleIter() { } else if (gntr_idx < GateImplementation::implemented_generators.size()) { constexpr auto gntr_op = GateImplementation::implemented_generators[gntr_idx]; - return prepend_to_tuple( + return Util::prepend_to_tuple( std::pair{gntr_op, Gates::GeneratorOpToMemberFuncPtr< PrecisionT, GateImplementation, gntr_op>::value}, @@ -144,7 +143,7 @@ void registerAllImplementedGateOps() { const auto &gate_op_func_pair) { const auto &[gate_op, func] = gate_op_func_pair; std::string op_name = - std::string(lookup(Gates::Constant::gate_names, gate_op)); + std::string(Util::lookup(Gates::Constant::gate_names, gate_op)); dispatcher.registerGateOperation(op_name, GateImplementation::kernel_id, func); return gate_op; @@ -170,7 +169,7 @@ void registerAllImplementedGeneratorOps() { [&dispatcher](const auto &gntr_op_func_pair) { const auto &[gntr_op, func] = gntr_op_func_pair; std::string op_name = - std::string(lookup(Gates::Constant::generator_names, gntr_op)); + std::string(Util::lookup(Gates::Constant::generator_names, gntr_op)); dispatcher.registerGeneratorOperation( op_name, GateImplementation::kernel_id, func); return gntr_op; diff --git a/pennylane_lightning/src/simulator/StateVectorBase.hpp b/pennylane_lightning/src/simulator/StateVectorBase.hpp index dec223408c..82d528745d 100644 --- a/pennylane_lightning/src/simulator/StateVectorBase.hpp +++ b/pennylane_lightning/src/simulator/StateVectorBase.hpp @@ -48,7 +48,7 @@ inline void apply##GATE_NAME##_(const std::vector &wires, \ bool inverse, Ts &&...args) { \ auto *arr = getData(); \ - static_assert(Gates::static_lookup( \ + static_assert(Util::static_lookup( \ Gates::Constant::gate_num_params) == sizeof...(Ts), \ "The provided number of parameters for gate " #GATE_NAME \ " is wrong."); \ @@ -65,7 +65,7 @@ inline void apply##GATE_NAME(const std::vector &wires, \ bool inverse, Ts &&...args) { \ constexpr auto kernel = \ - Gates::static_lookup( \ + Util::static_lookup( \ Gates::Constant::default_kernel_for_gates); \ apply##GATE_NAME##_(wires, inverse, \ std::forward(args)...); \ @@ -299,9 +299,8 @@ template class StateVectorBase { namespace Constant = Gates::Constant; using Gates::GateOperation; using Gates::SelectKernel; - using Gates::static_lookup; - constexpr auto kernel = static_lookup( + constexpr auto kernel = Util::static_lookup( Constant::default_kernel_for_gates); static_assert( Util::array_has_elt(SelectKernel::implemented_gates, @@ -315,9 +314,8 @@ template class StateVectorBase { namespace Constant = Gates::Constant; using Gates::GateOperation; using Gates::SelectKernel; - using Gates::static_lookup; - constexpr auto kernel = static_lookup( + constexpr auto kernel = Util::static_lookup( Constant::default_kernel_for_gates); static_assert( Util::array_has_elt(SelectKernel::implemented_gates, diff --git a/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp b/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp index f4dcf3b4c2..e429b676a6 100644 --- a/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp +++ b/pennylane_lightning/src/tests/Test_DynamicDispatcher.cpp @@ -55,7 +55,7 @@ struct testDispatchForKernel { // and compare it to the dynamic dispatcher auto test_st = ini_st; const auto gate_name = - std::string(static_lookup(Constant::gate_names)); + std::string(Util::static_lookup(Constant::gate_names)); DynamicDispatcher::getInstance().applyOperation( GateImplementation::kernel_id, test_st.data(), num_qubits, gate_name, wires, false, params); diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp index d2ecd00a4a..d2e957a745 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp @@ -38,7 +38,7 @@ constexpr std::string_view remove_prefix(const std::string_view &str, template constexpr auto findGateOpForGenerator() -> GateOperation { constexpr auto gntr_name = - remove_prefix(static_lookup(Constant::generator_names), 9); + remove_prefix(Util::static_lookup(Constant::generator_names), 9); for (const auto &[gate_op, gate_name] : Constant::gate_names) { if (gate_name == gntr_name) { @@ -76,8 +76,8 @@ void testGeneratorForGate(RandomEngine &re, size_t num_qubits) { constexpr ParamT eps = 1e-4; // For finite difference - constexpr auto gate_op = static_lookup(generator_gate_pairs); - constexpr auto gate_name = static_lookup(Constant::gate_names); + constexpr auto gate_op = Util::static_lookup(generator_gate_pairs); + constexpr auto gate_name = Util::static_lookup(Constant::gate_names); DYNAMIC_SECTION("Test generator of " << gate_name << " for kernel " << GateImplementation::name) { diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp index 6e33d781a5..d05f3444c1 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp @@ -27,7 +27,7 @@ template void testInverseKernelGate(RandomEngine &re, size_t num_qubits) { if constexpr (gate_op != GateOperation::Matrix) { - constexpr auto gate_name = static_lookup(Constant::gate_names); + constexpr auto gate_name = Util::static_lookup(Constant::gate_names); DYNAMIC_SECTION("Test inverse of " << gate_name << " for kernel " << GateImplementation::name) { const auto ini_st = createRandomState(re, num_qubits); diff --git a/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp b/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp index 81f85038e4..77193c9c3b 100644 --- a/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp +++ b/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp @@ -210,7 +210,7 @@ constexpr auto gateOpFuncPtrPairsWithNumParamsIter() { decltype(gate_op_func_ptr_pairs)>) { constexpr auto elt = std::get(gate_op_func_ptr_pairs); - if constexpr (static_lookup(Constant::gate_num_params) == + if constexpr (Util::static_lookup(Constant::gate_num_params) == num_params) { return Util::prepend_to_tuple( elt, gateOpFuncPtrPairsWithNumParamsIter< diff --git a/pennylane_lightning/src/util/LinearAlgebra.hpp b/pennylane_lightning/src/util/LinearAlgebra.hpp index 26a57a4e2f..f27546d673 100644 --- a/pennylane_lightning/src/util/LinearAlgebra.hpp +++ b/pennylane_lightning/src/util/LinearAlgebra.hpp @@ -779,8 +779,7 @@ auto squaredNorm(const T *data, size_t data_size) -> remove_complex_t { * Compute the squared norm of a real/complex vector :math:`\sum_k |v_k|^2` * @endrst * - * @param data Data pointer - * @param data_size Size of the data + * @param vec std::vector containing data */ template auto squaredNorm(const std::vector &vec) -> remove_complex_t { diff --git a/pennylane_lightning/src/util/Util.hpp b/pennylane_lightning/src/util/Util.hpp index b101dc9e35..274bc27f26 100644 --- a/pennylane_lightning/src/util/Util.hpp +++ b/pennylane_lightning/src/util/Util.hpp @@ -402,6 +402,25 @@ auto chunkData(const Container &data, std::size_t num_chunks) return chunkDataSize(data, div); } +/** + * @brief For lookup from any array of pair whose first elements are + * GateOperation. + * + * As Util::lookup can be used in constexpr context, this function is redundant + * (by the standard). But GCC 9 still does not accept Util::lookup in constexpr + * some cases. + */ +template +constexpr auto +static_lookup(const std::array, size> &arr) -> T { + for (size_t idx = 0; idx < size; idx++) { + if (std::get<0>(arr[idx]) == op) { + return std::get<1>(arr[idx]); + } + } + return T{}; +} + // type alias template using remove_cvref_t = typename remove_cvref::type; From 26a508dc0e0972ba3974ac7ab834e13b50952ce6 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 14 Mar 2022 23:05:34 -0400 Subject: [PATCH 72/94] Rename AMD64 to x86_64 --- pennylane_lightning/src/bindings/Bindings.hpp | 4 ++-- pennylane_lightning/src/simulator/DynamicDispatcher.cpp | 4 ++-- pennylane_lightning/src/simulator/StateVectorBase.hpp | 4 ++-- .../src/tests/Test_GateImplementations_Generator.cpp | 7 ++++--- .../src/tests/Test_GateImplementations_Inverse.cpp | 3 ++- pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp | 4 ++-- pennylane_lightning/src/util/Macros.hpp | 6 +++--- pennylane_lightning/src/util/RuntimeInfo.hpp | 2 +- 8 files changed, 18 insertions(+), 16 deletions(-) diff --git a/pennylane_lightning/src/bindings/Bindings.hpp b/pennylane_lightning/src/bindings/Bindings.hpp index 3f03d4a0d6..142f11223b 100644 --- a/pennylane_lightning/src/bindings/Bindings.hpp +++ b/pennylane_lightning/src/bindings/Bindings.hpp @@ -239,8 +239,8 @@ auto getCompileInfo() -> pybind11::dict { const std::string_view cpu_arch_str = [] { switch (cpu_arch) { - case CPUArch::AMD64: - return "AMD64"; + case CPUArch::X86_64: + return "x86_64"; case CPUArch::PPC64: return "PPC64"; case CPUArch::ARM: diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.cpp b/pennylane_lightning/src/simulator/DynamicDispatcher.cpp index 034612d573..5caaf99e55 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.cpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.cpp @@ -168,8 +168,8 @@ void registerAllImplementedGeneratorOps() { auto registerGeneratorToDispatcher = [&dispatcher](const auto &gntr_op_func_pair) { const auto &[gntr_op, func] = gntr_op_func_pair; - std::string op_name = - std::string(Util::lookup(Gates::Constant::generator_names, gntr_op)); + std::string op_name = std::string( + Util::lookup(Gates::Constant::generator_names, gntr_op)); dispatcher.registerGeneratorOperation( op_name, GateImplementation::kernel_id, func); return gntr_op; diff --git a/pennylane_lightning/src/simulator/StateVectorBase.hpp b/pennylane_lightning/src/simulator/StateVectorBase.hpp index 82d528745d..a157aacd10 100644 --- a/pennylane_lightning/src/simulator/StateVectorBase.hpp +++ b/pennylane_lightning/src/simulator/StateVectorBase.hpp @@ -48,7 +48,7 @@ inline void apply##GATE_NAME##_(const std::vector &wires, \ bool inverse, Ts &&...args) { \ auto *arr = getData(); \ - static_assert(Util::static_lookup( \ + static_assert(Util::static_lookup( \ Gates::Constant::gate_num_params) == sizeof...(Ts), \ "The provided number of parameters for gate " #GATE_NAME \ " is wrong."); \ @@ -65,7 +65,7 @@ inline void apply##GATE_NAME(const std::vector &wires, \ bool inverse, Ts &&...args) { \ constexpr auto kernel = \ - Util::static_lookup( \ + Util::static_lookup( \ Gates::Constant::default_kernel_for_gates); \ apply##GATE_NAME##_(wires, inverse, \ std::forward(args)...); \ diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp index d2e957a745..1440fcda1a 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Generator.cpp @@ -37,8 +37,8 @@ constexpr std::string_view remove_prefix(const std::string_view &str, template constexpr auto findGateOpForGenerator() -> GateOperation { - constexpr auto gntr_name = - remove_prefix(Util::static_lookup(Constant::generator_names), 9); + constexpr auto gntr_name = remove_prefix( + Util::static_lookup(Constant::generator_names), 9); for (const auto &[gate_op, gate_name] : Constant::gate_names) { if (gate_name == gntr_name) { @@ -77,7 +77,8 @@ void testGeneratorForGate(RandomEngine &re, size_t num_qubits) { constexpr ParamT eps = 1e-4; // For finite difference constexpr auto gate_op = Util::static_lookup(generator_gate_pairs); - constexpr auto gate_name = Util::static_lookup(Constant::gate_names); + constexpr auto gate_name = + Util::static_lookup(Constant::gate_names); DYNAMIC_SECTION("Test generator of " << gate_name << " for kernel " << GateImplementation::name) { diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp index d05f3444c1..fd73b7edfe 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp @@ -27,7 +27,8 @@ template void testInverseKernelGate(RandomEngine &re, size_t num_qubits) { if constexpr (gate_op != GateOperation::Matrix) { - constexpr auto gate_name = Util::static_lookup(Constant::gate_names); + constexpr auto gate_name = + Util::static_lookup(Constant::gate_names); DYNAMIC_SECTION("Test inverse of " << gate_name << " for kernel " << GateImplementation::name) { const auto ini_st = createRandomState(re, num_qubits); diff --git a/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp b/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp index 77193c9c3b..558e558f1a 100644 --- a/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp +++ b/pennylane_lightning/src/tests/Test_OpToMemberFuncPtr.cpp @@ -210,8 +210,8 @@ constexpr auto gateOpFuncPtrPairsWithNumParamsIter() { decltype(gate_op_func_ptr_pairs)>) { constexpr auto elt = std::get(gate_op_func_ptr_pairs); - if constexpr (Util::static_lookup(Constant::gate_num_params) == - num_params) { + if constexpr (Util::static_lookup( + Constant::gate_num_params) == num_params) { return Util::prepend_to_tuple( elt, gateOpFuncPtrPairsWithNumParamsIter< PrecisionT, ParamT, num_params, tuple_idx + 1>()); diff --git a/pennylane_lightning/src/util/Macros.hpp b/pennylane_lightning/src/util/Macros.hpp index c4c20d07bb..09f2d7b32f 100644 --- a/pennylane_lightning/src/util/Macros.hpp +++ b/pennylane_lightning/src/util/Macros.hpp @@ -101,11 +101,11 @@ #endif namespace Pennylane::Util::Constant { -enum class CPUArch { AMD64, PPC64, ARM, Unknown }; +enum class CPUArch { X86_64, PPC64, ARM, Unknown }; constexpr auto getCPUArchClangGCC() { #if defined(__x86_64__) - return CPUArch::AMD64; + return CPUArch::X86_64; #elif defined(__powerpc64__) return CPUArch::PPC64; #elif defined(__arm__) @@ -117,7 +117,7 @@ constexpr auto getCPUArchClangGCC() { constexpr auto getCPUArchMSVC() { #if defined(_M_AMD64) - return CPUArch::AMD64; + return CPUArch::X86_64; #elif defined(_M_PPC) return CPUArch::PPC64; #elif defined(_M_ARM) diff --git a/pennylane_lightning/src/util/RuntimeInfo.hpp b/pennylane_lightning/src/util/RuntimeInfo.hpp index 416422bd45..2286009349 100644 --- a/pennylane_lightning/src/util/RuntimeInfo.hpp +++ b/pennylane_lightning/src/util/RuntimeInfo.hpp @@ -20,7 +20,7 @@ namespace Pennylane::Util { /** - * @brief This class is only usable in x86 or AMD64 architecture. + * @brief This class is only usable in x86 or x86_64 architecture. */ class RuntimeInfo { private: From 8cbef2675e5946516ca10e031128040b9e03e603 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Mon, 14 Mar 2022 23:52:20 -0400 Subject: [PATCH 73/94] Add docstring --- .../src/tests/CreateAllWires.hpp | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pennylane_lightning/src/tests/CreateAllWires.hpp b/pennylane_lightning/src/tests/CreateAllWires.hpp index f462b4ae20..d923d5538c 100644 --- a/pennylane_lightning/src/tests/CreateAllWires.hpp +++ b/pennylane_lightning/src/tests/CreateAllWires.hpp @@ -14,6 +14,16 @@ class WiresGenerator { [[nodiscard]] virtual auto all_perms() const -> const std::vector> & = 0; }; + +/** + * @brief + * @rst Generating all permutation of wires without ordering (often called + * as combination). The size of all combination is given as :math:`n \choose r`. + * + * We use the recursion formula + * :math:`{n \choose r} = {n \choose r-1} + {n-1 \choose r}` + * @endrst + */ class CombinationGenerator : public WiresGenerator { private: std::vector v_; @@ -45,6 +55,16 @@ class CombinationGenerator : public WiresGenerator { return all_perms_; } }; + +/** + * @brief + * @rst Generating all permutation of wires with ordering. The size of all + * permutation is given as :math:`{}_{n}P_r=n!/(n-r)!r!`. + * @endrst + * + * We use the recursion formula + * :math:`{}_n P_r = n {}_{n-1} P_{r-1}` + */ class PermutationGenerator : public WiresGenerator { private: std::vector> all_perms_; From 3ea7b4987ddbe6e98a5eec74600772baa32f79ca Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 15 Mar 2022 11:13:40 -0400 Subject: [PATCH 74/94] add docstring --- .../tests/Test_GateImplementations_CompareKernels.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp index 0c87e07154..0412dfc5d8 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp @@ -31,6 +31,9 @@ using namespace Pennylane::Gates::Constant; using std::vector; +/** + * @brief Change the given type list of kernels to string + */ template std::string kernelsToString() { if constexpr (!std::is_same_v) { return std::string(TypeList::Type::name) + ", " + @@ -56,6 +59,10 @@ struct KernelsImplementingGateHelper { using Type = void; }; + +/** + * @brief Type list of kernels implementing the given gate operation. + */ template struct KernelsImplementingGate { using Type = typename KernelsImplementingGateHelper::Type; @@ -102,6 +109,10 @@ auto applyGateForImplemetingKernels( ini, num_qubits, wires, inverse, params)...); } +/** + * @brief Apply the given gate using all implementing kernels and compare + * the results. + */ template void testApplyGate(RandomEngine &re, size_t num_qubits) { From cb3256e17b7c7a5b32b01d825bad3cff9c38bbcb Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Tue, 15 Mar 2022 18:22:44 +0000 Subject: [PATCH 75/94] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index ac926443a4..3bbb514373 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.23.0-dev0" +__version__ = "0.23.0-dev1" From 4be5ac6aeb924c367417705ca9f512f0ef5b46fe Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 15 Mar 2022 14:23:56 -0400 Subject: [PATCH 76/94] Format --- pennylane_lightning/src/tests/CreateAllWires.hpp | 10 +++++----- .../tests/Test_GateImplementations_CompareKernels.cpp | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pennylane_lightning/src/tests/CreateAllWires.hpp b/pennylane_lightning/src/tests/CreateAllWires.hpp index d923d5538c..a05a7f8e9b 100644 --- a/pennylane_lightning/src/tests/CreateAllWires.hpp +++ b/pennylane_lightning/src/tests/CreateAllWires.hpp @@ -16,11 +16,11 @@ class WiresGenerator { }; /** - * @brief + * @brief * @rst Generating all permutation of wires without ordering (often called * as combination). The size of all combination is given as :math:`n \choose r`. * - * We use the recursion formula + * We use the recursion formula * :math:`{n \choose r} = {n \choose r-1} + {n-1 \choose r}` * @endrst */ @@ -57,12 +57,12 @@ class CombinationGenerator : public WiresGenerator { }; /** - * @brief - * @rst Generating all permutation of wires with ordering. The size of all + * @brief + * @rst Generating all permutation of wires with ordering. The size of all * permutation is given as :math:`{}_{n}P_r=n!/(n-r)!r!`. * @endrst * - * We use the recursion formula + * We use the recursion formula * :math:`{}_n P_r = n {}_{n-1} P_{r-1}` */ class PermutationGenerator : public WiresGenerator { diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp index 0412dfc5d8..82a62b81b1 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp @@ -59,7 +59,6 @@ struct KernelsImplementingGateHelper { using Type = void; }; - /** * @brief Type list of kernels implementing the given gate operation. */ From ebbcf688efa0d209ffa9b02420b7a062c7dd3305 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 16 Mar 2022 09:48:44 -0400 Subject: [PATCH 77/94] Small fix --- pennylane_lightning/src/tests/Test_Util.cpp | 43 +++++++++---------- pennylane_lightning/src/util/ConstantUtil.hpp | 10 +++++ 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/pennylane_lightning/src/tests/Test_Util.cpp b/pennylane_lightning/src/tests/Test_Util.cpp index 0b4d4f7e9d..23b386b3e6 100644 --- a/pennylane_lightning/src/tests/Test_Util.cpp +++ b/pennylane_lightning/src/tests/Test_Util.cpp @@ -460,6 +460,27 @@ TEMPLATE_TEST_CASE("Utility math functions", "[Util][LinearAlgebra]", float, "the input right matrix")); } } + SECTION("SquaredNorm") { + { // for float + std::vector vec{0.0, 1.0, 3.0, 10.0}; + CHECK(Util::squaredNorm(vec) == Approx(110.0)); + } + + { // for double + std::vector vec{0.0, 1.0, 3.0, 10.0}; + CHECK(Util::squaredNorm(vec) == Approx(110.0)); + } + + { // for complex + std::vector> vec{{0.0, 1.0}, {3.0, 10.0}}; + CHECK(Util::squaredNorm(vec) == Approx(110.0)); + } + + { // for complex + std::vector> vec{{0.0, 1.0}, {3.0, 10.0}}; + CHECK(Util::squaredNorm(vec) == Approx(110.0)); + } + } } /** @@ -568,28 +589,6 @@ TEST_CASE("Utility bit operations", "[Util][BitUtil]") { } } } - - SECTION("SquaredNorm") { - { // for float - std::vector vec{0.0, 1.0, 3.0, 10.0}; - CHECK(Util::squaredNorm(vec) == Approx(110.0)); - } - - { // for double - std::vector vec{0.0, 1.0, 3.0, 10.0}; - CHECK(Util::squaredNorm(vec) == Approx(110.0)); - } - - { // for complex - std::vector> vec{{0.0, 1.0}, {3.0, 10.0}}; - CHECK(Util::squaredNorm(vec) == Approx(110.0)); - } - - { // for complex - std::vector> vec{{0.0, 1.0}, {3.0, 10.0}}; - CHECK(Util::squaredNorm(vec) == Approx(110.0)); - } - } } TEST_CASE("Utility array and tuples", "[Util]") { diff --git a/pennylane_lightning/src/util/ConstantUtil.hpp b/pennylane_lightning/src/util/ConstantUtil.hpp index d3995e7642..208ab30a28 100644 --- a/pennylane_lightning/src/util/ConstantUtil.hpp +++ b/pennylane_lightning/src/util/ConstantUtil.hpp @@ -200,6 +200,8 @@ reverse_pairs_helper(const std::array, size> &arr, * @tparam T Type of first elements * @tparam U Type of second elements * @tparam size Size of the array + * @param arr Array to reverse + * @return reversed array */ template constexpr auto reverse_pairs(const std::array, size> &arr) @@ -208,6 +210,14 @@ constexpr auto reverse_pairs(const std::array, size> &arr) std::make_index_sequence{}); } +/** + * @brief Constexpr function that check whether the given value is a power of 2. + * + * Can be merged with isPerfectPowerOf2 in C++20 using constexpr std::popcount. + * + * @param value Value to check + * @return True when the given value is a power of 2 + */ constexpr auto constIsPerfectPowerOf2(size_t value) -> bool { while ((value & 1U) == 0) { value >>= 1U; From 97bc9b90f5d0d6b445451a1deea95d2b2c99a4cb Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Wed, 16 Mar 2022 13:52:24 +0000 Subject: [PATCH 78/94] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index 3bbb514373..8bad23fea1 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.23.0-dev1" +__version__ = "0.23.0-dev2" From f9ec2af2ad327b57d7bfe4f887362d786c6f5a05 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 16 Mar 2022 09:55:52 -0400 Subject: [PATCH 79/94] Some more fix --- pennylane_lightning/src/util/RuntimeInfo.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pennylane_lightning/src/util/RuntimeInfo.hpp b/pennylane_lightning/src/util/RuntimeInfo.hpp index 2286009349..933bcf0642 100644 --- a/pennylane_lightning/src/util/RuntimeInfo.hpp +++ b/pennylane_lightning/src/util/RuntimeInfo.hpp @@ -27,10 +27,10 @@ class RuntimeInfo { struct InternalRuntimeInfo { InternalRuntimeInfo(); - std::bitset<32> f_1_ecx; - std::bitset<32> f_1_edx; - std::bitset<32> f_7_ebx; - std::bitset<32> f_7_ecx; + std::bitset<32> f_1_ecx{}; + std::bitset<32> f_1_edx{}; + std::bitset<32> f_7_ebx{}; + std::bitset<32> f_7_ecx{}; }; static const inline InternalRuntimeInfo internal_runtime_info_; From 865557e7c8293f8bb9e09869a4a1fac418795174 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 16 Mar 2022 09:58:00 -0400 Subject: [PATCH 80/94] Trigger CI From 86531d5e2f1d60785fcfa99727f2fe6394896981 Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Thu, 17 Mar 2022 17:18:14 +0000 Subject: [PATCH 81/94] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index 8bad23fea1..97154f70b9 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.23.0-dev2" +__version__ = "0.23.0-dev3" From cf654a79710c120afbf34485c09a48b0797cd374 Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Thu, 17 Mar 2022 20:39:45 +0000 Subject: [PATCH 82/94] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index e4d2b0b628..b1866b428a 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.23.0-dev4" +__version__ = "0.23.0-dev5" From b341f8cfa4af8a703fb56556d3c6308388dbb8e2 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Thu, 17 Mar 2022 16:41:32 -0400 Subject: [PATCH 83/94] Trigger CI From ac985c6fd3192cb5bfeb564bee8c063672fc115d Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Thu, 17 Mar 2022 17:26:24 -0400 Subject: [PATCH 84/94] Trigger CI From 00f4dc7d739ac201b1c4ceb1d65da9d0707ee033 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Thu, 17 Mar 2022 20:36:39 -0400 Subject: [PATCH 85/94] Trigger CI From a46a41fb9455b8750b319d694f38e81a9c97f69e Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Fri, 18 Mar 2022 00:36:57 +0000 Subject: [PATCH 86/94] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index b1866b428a..01c4a6e800 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.23.0-dev5" +__version__ = "0.23.0-dev6" From 3bcba89b518f6be45682369274f9d2f1369eead7 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 19 Mar 2022 19:33:28 -0400 Subject: [PATCH 87/94] Fix doc --- pennylane_lightning/src/simulator/DynamicDispatcher.hpp | 2 ++ pennylane_lightning/src/util/TypeList.hpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp index 469873751f..4622676387 100644 --- a/pennylane_lightning/src/simulator/DynamicDispatcher.hpp +++ b/pennylane_lightning/src/simulator/DynamicDispatcher.hpp @@ -59,6 +59,7 @@ namespace Pennylane { */ template struct registerBeforeMain; +/// @cond DEV template <> struct registerBeforeMain { const static inline int dummy = Internal::registerAllAvailableKernels(); @@ -68,6 +69,7 @@ template <> struct registerBeforeMain { const static inline int dummy = Internal::registerAllAvailableKernels(); }; +/// @endcond /** * @brief DynamicDispatcher class diff --git a/pennylane_lightning/src/util/TypeList.hpp b/pennylane_lightning/src/util/TypeList.hpp index a53c3cbd5d..d87c3c540a 100644 --- a/pennylane_lightning/src/util/TypeList.hpp +++ b/pennylane_lightning/src/util/TypeList.hpp @@ -27,6 +27,7 @@ template struct TypeNode { using Type = T; using Next = TypeNode; }; +///@cond DEV template struct TypeNode { using Type = T; using Next = void; @@ -35,6 +36,7 @@ template struct TypeNode { using Type = T; using Next = void; }; +///@endcond /** * @brief Define type list From d358a3c6ea30c6b8d44e271a6be718f476917ab1 Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Tue, 22 Mar 2022 23:33:35 +0000 Subject: [PATCH 88/94] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index 4091fc7c62..adb0d6145d 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.23.0-dev11" +__version__ = "0.23.0-dev12" From 80243581545caa80824e9b4e502b7d9c072fa185 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Tue, 22 Mar 2022 21:54:11 -0400 Subject: [PATCH 89/94] Format --- .../src/algorithms/AdjointDiff.hpp | 16 +++++++++------- .../Test_GateImplementations_CompareKernels.cpp | 6 +++--- .../tests/Test_GateImplementations_Inverse.cpp | 3 ++- pennylane_lightning/src/util/ConstantUtil.hpp | 2 +- pennylane_lightning/src/util/LinearAlgebra.hpp | 1 - pennylane_lightning/src/util/TypeTraits.hpp | 2 +- 6 files changed, 16 insertions(+), 14 deletions(-) diff --git a/pennylane_lightning/src/algorithms/AdjointDiff.hpp b/pennylane_lightning/src/algorithms/AdjointDiff.hpp index 92c8a8d78c..62e5946f4d 100644 --- a/pennylane_lightning/src/algorithms/AdjointDiff.hpp +++ b/pennylane_lightning/src/algorithms/AdjointDiff.hpp @@ -157,9 +157,10 @@ template class AdjointJacobian { * @param reference_state Reference statevector * @param observables Vector of observables to apply to each statevector. */ - inline void applyObservables(std::vector> &states, - const StateVectorManagedCPU &reference_state, - const std::vector> &observables) { + inline void + applyObservables(std::vector> &states, + const StateVectorManagedCPU &reference_state, + const std::vector> &observables) { // clang-format off // Globally scoped exception value to be captured within OpenMP block. // See the following for OpenMP design decisions: @@ -207,9 +208,9 @@ template class AdjointJacobian { * @param op_idx Index of given operation within operations list to take * adjoint of. */ - inline void applyOperationsAdj(std::vector> &states, - const OpsData &operations, - size_t op_idx) { + inline void + applyOperationsAdj(std::vector> &states, + const OpsData &operations, size_t op_idx) { // clang-format off // Globally scoped exception value to be captured within OpenMP block. // See the following for OpenMP design decisions: @@ -333,7 +334,8 @@ template class AdjointJacobian { num_param_ops - 1; // total number of parametric ops // Create $U_{1:p}\vert \lambda \rangle$ - StateVectorManagedCPU lambda(jd.getPtrStateVec(), jd.getSizeStateVec()); + StateVectorManagedCPU lambda(jd.getPtrStateVec(), + jd.getSizeStateVec()); // Apply given operations to statevector if requested if (apply_operations) { diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp index 55ac7f975e..1470393e18 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_CompareKernels.cpp @@ -79,9 +79,9 @@ template struct KernelsImplementingGate { */ template -auto applyGate(std::vector, Alloc> ini, size_t num_qubits, - const std::vector &wires, bool inverse, - const std::vector ¶ms) +auto applyGate(std::vector, Alloc> ini, + size_t num_qubits, const std::vector &wires, + bool inverse, const std::vector ¶ms) -> std::vector, Alloc> { callGateOps(GateOpToMemberFuncPtr::value, diff --git a/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp b/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp index 20bc9784ce..876b67f56e 100644 --- a/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp +++ b/pennylane_lightning/src/tests/Test_GateImplementations_Inverse.cpp @@ -26,7 +26,8 @@ using namespace Pennylane::Gates; template void testInverseKernelGate(RandomEngine &re, size_t num_qubits) { - constexpr auto gate_name = Util::static_lookup(Constant::gate_names); + constexpr auto gate_name = + Util::static_lookup(Constant::gate_names); DYNAMIC_SECTION("Test inverse of " << gate_name << " for kernel " << GateImplementation::name) { const auto ini_st = createRandomState(re, num_qubits); diff --git a/pennylane_lightning/src/util/ConstantUtil.hpp b/pennylane_lightning/src/util/ConstantUtil.hpp index 6a7e1490c2..8de2972ea8 100644 --- a/pennylane_lightning/src/util/ConstantUtil.hpp +++ b/pennylane_lightning/src/util/ConstantUtil.hpp @@ -17,8 +17,8 @@ */ #pragma once -#include "Util.hpp" #include "TypeTraits.hpp" +#include "Util.hpp" #include #include diff --git a/pennylane_lightning/src/util/LinearAlgebra.hpp b/pennylane_lightning/src/util/LinearAlgebra.hpp index 4af7701100..b85114caab 100644 --- a/pennylane_lightning/src/util/LinearAlgebra.hpp +++ b/pennylane_lightning/src/util/LinearAlgebra.hpp @@ -27,7 +27,6 @@ #include #include - /// @cond DEV #if __has_include() && defined _ENABLE_BLAS #include diff --git a/pennylane_lightning/src/util/TypeTraits.hpp b/pennylane_lightning/src/util/TypeTraits.hpp index b979ab6742..cc10f3ef11 100644 --- a/pennylane_lightning/src/util/TypeTraits.hpp +++ b/pennylane_lightning/src/util/TypeTraits.hpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. /** - * @file + * @file * Contains type traits */ #pragma once From 04897a0139b6955c1fa458bd5ad839342b762603 Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Wed, 23 Mar 2022 14:12:25 +0000 Subject: [PATCH 90/94] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index adb0d6145d..fc586eba69 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.23.0-dev12" +__version__ = "0.23.0-dev13" From 95284810107d08f437be0f1f9a090ac707a8b71a Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Wed, 23 Mar 2022 14:54:57 -0400 Subject: [PATCH 91/94] Trigger CI From 6e35950027d81b45c2b351f89ea26ad544b6e14a Mon Sep 17 00:00:00 2001 From: Dev version update bot Date: Sat, 7 May 2022 18:52:26 +0000 Subject: [PATCH 92/94] Auto update version --- pennylane_lightning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pennylane_lightning/_version.py b/pennylane_lightning/_version.py index d1a7ed1d64..018b2a730b 100644 --- a/pennylane_lightning/_version.py +++ b/pennylane_lightning/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.24.0-dev5" +__version__ = "0.24.0-dev6" From d20339cbe2a2c00412e0875e8506906f20737cb8 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 7 May 2022 14:56:38 -0400 Subject: [PATCH 93/94] Remove files --- doc/conf.py | 1 + .../src/examples/benchmark_gate.cpp | 205 ------------------ .../src/examples/benchmark_generator.cpp | 202 ----------------- .../src/examples/benchmark_matrix.cpp | 143 ------------ .../src/examples/run_benchmark.py | 153 ------------- 5 files changed, 1 insertion(+), 703 deletions(-) delete mode 100644 pennylane_lightning/src/examples/benchmark_gate.cpp delete mode 100644 pennylane_lightning/src/examples/benchmark_generator.cpp delete mode 100644 pennylane_lightning/src/examples/benchmark_matrix.cpp delete mode 100755 pennylane_lightning/src/examples/run_benchmark.py diff --git a/doc/conf.py b/doc/conf.py index 2da0d0f85d..cc2249239e 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -31,6 +31,7 @@ CPP_SOURCE_DIR = PROJECT_SOURCE_DIR.joinpath("pennylane_lightning/src") CPP_EXCLUDE_DIRS = ["examples", "tests", "benchmarks"] # relative to CPP_SOURCE_DIR + def obtain_cpp_files(): script_path = PROJECT_SOURCE_DIR.joinpath("bin/cpp-files") diff --git a/pennylane_lightning/src/examples/benchmark_gate.cpp b/pennylane_lightning/src/examples/benchmark_gate.cpp deleted file mode 100644 index a20b92d4b9..0000000000 --- a/pennylane_lightning/src/examples/benchmark_gate.cpp +++ /dev/null @@ -1,205 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Constant.hpp" -#include "ExampleUtil.hpp" -#include "StateVectorManagedCPU.hpp" - -#ifdef USE_SINGLE_PRECISION -using PrecisionT = float; -#pragma message "Using single precision" -#else -using PrecisionT = double; -#endif - -using namespace Pennylane; -using Util::operator<<; - -struct GateDesc { - std::string name; - std::vector wires; - bool inverse; - std::vector params; - - template - GateDesc(Arg0 &&arg0, Arg1 &&arg1, Arg2 &&arg2, Arg3 &&arg3) - : name{std::forward(arg0)}, wires{std::forward(arg1)}, - inverse{std::forward(arg2)}, params{std::forward(arg3)} {} -}; - -std::ostream &operator<<(std::ostream &os, GateDesc &desc) { - os << desc.name << ", " << desc.wires << "," << desc.inverse << "," - << desc.params << std::endl; - return os; -} - -template -auto generateGateSequence(RandomEngine &re, const std::string &gate_name, - const size_t num_reps, const size_t num_qubits, - const size_t num_wires_for_multi_qubit) - -> std::vector { - using namespace Gates::Constant; - using Gates::GateOperation; - - const GateOperation gate_op = Util::lookup(Util::reverse_pairs(gate_names), - std::string_view(gate_name)); - const size_t num_wires = [=]() { - if (Util::array_has_elt(multi_qubit_gates, gate_op)) { - // if multi qubit gate - return num_wires_for_multi_qubit; - } - return Util::lookup(gate_wires, gate_op); - }(); - const size_t num_params = Util::lookup(gate_num_params, gate_op); - - std::vector gate_seq; - std::uniform_int_distribution inverse_dist(0, 1); - std::uniform_real_distribution param_dist(0.0, 2 * M_PI); - - for (uint32_t k = 0; k < num_reps; k++) { - std::vector params; - params.reserve(num_params); - - bool inverse = static_cast(inverse_dist(re)); - auto wires = generateNeighboringWires(re, num_qubits, num_wires); - - for (size_t idx = 0; idx < num_params; idx++) { - params.emplace_back(param_dist(re)); - } - - gate_seq.emplace_back(gate_name, std::move(wires), inverse, - std::move(params)); - } - return gate_seq; -} - -double benchmarkGate(Gates::KernelType kernel, const size_t num_qubits, - const std::vector &gate_seq) { - // Run benchmark. Total num_reps number of gates is used. - StateVectorManagedCPU svdat{num_qubits}; - - std::chrono::time_point t_start = - std::chrono::high_resolution_clock::now(); - for (const auto &gate : gate_seq) { - svdat.applyOperation(kernel, gate.name, gate.wires, gate.inverse, - gate.params); - } - std::chrono::time_point t_end = - std::chrono::high_resolution_clock::now(); - - return std::chrono::duration(t_end - t_start).count(); -} - -template -double runBenchmarkGate(RandomEngine &re, Gates::KernelType kernel, - const std::string &gate_name, size_t num_reps, - size_t num_qubits, size_t num_wires_for_multi_qubit) { - auto gate_seq = generateGateSequence(re, gate_name, num_reps, num_qubits, - num_wires_for_multi_qubit); - - // Log generated sequence if LOG is turned on - const char *env_p = std::getenv("LOG"); - try { - if (env_p != nullptr && std::stoi(env_p) != 0) { - for (const auto &gate : gate_seq) { - std::cerr << gate.name << ", " << gate.wires << "," - << gate.inverse << "," << gate.params << std::endl; - } - } - } catch (std::exception &e) { - // Just do not print log - } - - return benchmarkGate(kernel, num_qubits, gate_seq); -} - -/** - * @brief Benchmark Pennylane-Lightning for a given generator - * - * @param argc Number of arguments - * @param argv Command line arguments - * @return Returns 0 is completed successfully - */ -int main(int argc, char *argv[]) { - using namespace Pennylane::Gates; - // Handle input - if (argc != 5 && argc != 6) { // NOLINT(readability-magic-numbers) - std::cerr - << "Wrong number of inputs. User provided " << argc - 1 - << " inputs. \n" - << "Usage: " + std::string(argv[0]) + - " num_reps num_qubits kernel [generator|gate] [num_wires]\n" - "Examples: \n" - << "\t" << argv[0] << " 1000 10 PI GeneratorCRX\n" - << "\t" << argv[0] << " 1000 10 LM CRX\n" - << "\t" << argv[0] << " 1000 10 LM MutliRZ 3\n"; - return -1; - } - - size_t num_reps; - size_t num_qubits; - - try { - num_reps = std::stoi(argv[1]); - num_qubits = std::stoi(argv[2]); - } catch (std::exception &e) { - std::cerr << "Arguments num_reps and num_qubits must be integers." - << std::endl; - return -1; - } - - std::string_view kernel_name = argv[3]; - KernelType kernel = string_to_kernel(kernel_name); - if (kernel == KernelType::None) { - std::cerr << "Kernel " << kernel_name << " is unknown." << std::endl; - return 1; - } - - std::string_view gate_name = argv[4]; - if (!Util::array_has_elt(Util::second_elts_of(Constant::gate_names), - gate_name)) { - std::cerr << "Unknown gate name " << gate_name << " is provided" - << std::endl; - return 1; - } - - Gates::GateOperation gate_op = - Util::lookup(Util::reverse_pairs(Constant::gate_names), gate_name); - - size_t num_wires_for_multi_qubit = 0; - if (Util::array_has_elt(Constant::multi_qubit_gates, gate_op)) { - // User provided a multi-qubit gates - if (argc != 6) { // NOLINT(readability-magic-numbers) - std::cerr << "One should provide the number of wires when using " - "multi qubit gates." - << std::endl; - return 1; - } - - try { - // NOLINTNEXTLINE(readability-magic-numbers) - num_wires_for_multi_qubit = std::stoi(argv[5]); - } catch (std::exception &e) { - std::cerr << "Number of wires must be an integer" << std::endl; - return 1; - } - } - - std::random_device rd; - std::mt19937 re(rd()); - - double walltime = - runBenchmarkGate(re, kernel, std::string(gate_name), num_reps, - num_qubits, num_wires_for_multi_qubit); - - // Output walltime in csv format (Num Qubits, Time (milliseconds)) - std::cout << num_qubits << ", " << walltime / static_cast(num_reps) - << std::endl; - return 0; -} diff --git a/pennylane_lightning/src/examples/benchmark_generator.cpp b/pennylane_lightning/src/examples/benchmark_generator.cpp deleted file mode 100644 index c1ea726ec3..0000000000 --- a/pennylane_lightning/src/examples/benchmark_generator.cpp +++ /dev/null @@ -1,202 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Constant.hpp" -#include "DynamicDispatcher.hpp" -#include "ExampleUtil.hpp" -#include "StateVectorManagedCPU.hpp" - -#ifdef USE_SINGLE_PRECISION -using PrecisionT = float; -#pragma message "Using single precision" -#else -using PrecisionT = double; -#endif - -using namespace Pennylane; -using namespace Pennylane::Gates; -using namespace Pennylane::Util; - -auto generatorOp(const std::string_view &name) -> Gates::GeneratorOperation { - auto &dispatcher = DynamicDispatcher::getInstance(); - return dispatcher.strToGeneratorOp(std::string(name)); -} - -struct GeneratorDesc { - std::string name; - std::vector wires; - bool inverse; - - template - GeneratorDesc(Arg0 &&arg0, Arg1 &&arg1, Arg2 &&arg2) - : name{std::forward(arg0)}, wires{std::forward(arg1)}, - inverse{std::forward(arg2)} {} -}; - -std::ostream &operator<<(std::ostream &os, GeneratorDesc &desc) { - os << desc.name << ", " << desc.wires << "," << desc.inverse << std::endl; - return os; -} - -template -auto generateGeneratorSequence(RandomEngine &re, - const GeneratorOperation gntr_op, - const size_t num_reps, const size_t num_qubits, - const size_t num_wires_for_multi_qubit) - -> std::vector { - namespace Constant = Gates::Constant; - using Gates::GeneratorOperation; - - const auto gntr_name = - Util::lookup(Constant::generator_names, gntr_op).substr(9); - - const size_t num_wires = [=]() { - if (Util::array_has_elt(Constant::multi_qubit_generators, gntr_op)) { - // if multi qubit gate - return num_wires_for_multi_qubit; - } - return Util::lookup(Constant::generator_wires, gntr_op); - }(); - - std::vector gntr_seq; - std::uniform_int_distribution inverse_dist(0, 1); - - for (uint32_t k = 0; k < num_reps; k++) { - bool inverse = static_cast(inverse_dist(re)); - auto wires = generateNeighboringWires(re, num_qubits, num_wires); - - gntr_seq.emplace_back(gntr_name, std::move(wires), inverse); - } - return gntr_seq; -} - -double benchmarkGenerator(KernelType kernel, const size_t num_qubits, - const std::vector &gntr_seq) { - // Run benchmark. Total num_reps number of gates is used. - StateVectorManagedCPU svdat{num_qubits}; - - std::chrono::time_point t_start = - std::chrono::high_resolution_clock::now(); - for (const auto &gntr : gntr_seq) { - [[maybe_unused]] PrecisionT scale = - svdat.applyGenerator(kernel, gntr.name, gntr.wires, gntr.inverse); - } - std::chrono::time_point t_end = - std::chrono::high_resolution_clock::now(); - - return std::chrono::duration(t_end - t_start).count(); -} - -template -double runBenchmarkGenerator(RandomEngine &re, KernelType kernel, - const GeneratorOperation gntr_op, size_t num_reps, - size_t num_qubits, - size_t num_wires_for_multi_qubit) { - auto gntr_seq = generateGeneratorSequence(re, gntr_op, num_reps, num_qubits, - num_wires_for_multi_qubit); - - // Log generated sequence if LOG is turned on - const char *env_p = std::getenv("LOG"); - try { - if (env_p != nullptr && std::stoi(env_p) != 0) { - for (const auto &gntr : gntr_seq) { - std::cerr << gntr.name << ", " << gntr.wires << "," - << gntr.inverse << std::endl; - } - } - } catch (std::exception &e) { - // Just do not print log - } - - return benchmarkGenerator(kernel, num_qubits, gntr_seq); -} - -/** - * @brief Benchmark Pennylane-Lightning for a given generator - * - * @param argc Number of arguments - * @param argv Command line arguments - * @return Returns 0 is completed successfully - */ -int main(int argc, char *argv[]) { - namespace Constant = Gates::Constant; - // Handle input - if (argc != 5 && argc != 6) { // NOLINT(readability-magic-numbers) - std::cerr - << "Wrong number of inputs. User provided " << argc - 1 - << " inputs. \n" - << "Usage: " + std::string(argv[0]) + - " num_reps num_qubits kernel [generator|gate] [num_wires]\n" - "Examples: \n" - << "\t" << argv[0] << " 1000 10 PI GeneratorCRX\n" - << "\t" << argv[0] << " 1000 10 LM CRX\n" - << "\t" << argv[0] << " 1000 10 LM MutliRZ 3\n"; - return -1; - } - - size_t num_reps; - size_t num_qubits; - - try { - num_reps = std::stoi(argv[1]); - num_qubits = std::stoi(argv[2]); - } catch (std::exception &e) { - std::cerr << "Arguments num_reps and num_qubits must be integers." - << std::endl; - return -1; - } - - std::string_view kernel_name = argv[3]; - KernelType kernel = string_to_kernel(kernel_name); - if (kernel == KernelType::None) { - std::cerr << "Kernel " << kernel_name << " is unknown." << std::endl; - return 1; - } - - std::string_view gntr_name = argv[4]; - Gates::GeneratorOperation gntr_op; - - try { - gntr_op = generatorOp(gntr_name); - } catch (std::exception &e) { - std::cout << "Unknown generator " + std::string(gntr_name) + " provided" - << std::endl; - return 1; - } - - size_t num_wires_for_multi_qubit = 0; - if (Util::array_has_elt(Constant::multi_qubit_generators, gntr_op)) { - // User provided a multi-qubit gates - if (argc != 6) { // NOLINT(readability-magic-numbers) - std::cerr << "One should provide the number of wires when using " - "multi qubit generators." - << std::endl; - return 1; - } - - try { - // NOLINTNEXTLINE(readability-magic-numbers) - num_wires_for_multi_qubit = std::stoi(argv[5]); - } catch (std::exception &e) { - std::cerr << "Number of wires must be an integer" << std::endl; - return 1; - } - } - - std::random_device rd; - std::mt19937 re(rd()); - - double walltime = runBenchmarkGenerator( - re, kernel, gntr_op, num_reps, num_qubits, num_wires_for_multi_qubit); - - // Output walltime in csv format (Num Qubits, Time (milliseconds)) - std::cout << num_qubits << ", " << walltime / static_cast(num_reps) - << std::endl; - return 0; -} diff --git a/pennylane_lightning/src/examples/benchmark_matrix.cpp b/pennylane_lightning/src/examples/benchmark_matrix.cpp deleted file mode 100644 index 26d6ec45df..0000000000 --- a/pennylane_lightning/src/examples/benchmark_matrix.cpp +++ /dev/null @@ -1,143 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Constant.hpp" -#include "ExampleUtil.hpp" -#include "LinearAlgebra.hpp" -#include "StateVectorManagedCPU.hpp" - -#ifdef USE_SINGLE_PRECISION -using PrecisionT = float; -#pragma message "Using single precision" -#else -using PrecisionT = double; -#endif - -using namespace Pennylane; -using namespace Pennylane::Gates; -using namespace Pennylane::Util; - -struct MatOpDesc { - std::vector wires; - bool inverse; - std::vector> mat; - - template - MatOpDesc(Arg0 &&arg0, Arg1 &&arg1, Arg2 &&arg2) - : wires{std::forward(arg0)}, inverse{std::forward(arg1)}, - mat{std::forward(arg2)} {} -}; - -template -auto generateMatrixSequence(RandomEngine &re, const size_t num_reps, - const size_t num_qubits, const size_t num_wires) - -> std::vector { - std::vector matrix_seq; - matrix_seq.reserve(num_reps); - std::uniform_int_distribution inverse_dist(0, 1); - for (uint32_t k = 0; k < num_reps; k++) { - bool inverse = static_cast(inverse_dist(re)); - auto wires = generateNeighboringWires(re, num_qubits, num_wires); - - matrix_seq.emplace_back(std::move(wires), inverse, - Util::randomUnitary(re, num_wires)); - } - return matrix_seq; -} - -double benchmarkMatrix(KernelType kernel, const size_t num_qubits, - const std::vector &mat_seq) { - // Run benchmark. Total num_reps number of gates is used. - StateVectorManagedCPU svdat{num_qubits}; - - std::chrono::time_point t_start = - std::chrono::high_resolution_clock::now(); - for (const auto &mat_desc : mat_seq) { - svdat.applyMatrix(kernel, mat_desc.mat.data(), mat_desc.wires, - mat_desc.inverse); - } - std::chrono::time_point t_end = - std::chrono::high_resolution_clock::now(); - - return std::chrono::duration(t_end - t_start).count(); -} - -template -double runBenchmarkMatrix(RandomEngine &re, KernelType kernel, size_t num_reps, - size_t num_qubits, size_t num_wires) { - auto mat_seq = generateMatrixSequence(re, num_reps, num_qubits, num_wires); - - // Log generated sequence if LOG is turned on - const char *env_p = std::getenv("LOG"); - try { - if (env_p != nullptr && std::stoi(env_p) != 0) { - for (const auto &mat_desc : mat_seq) { - std::cerr << mat_desc.wires << ", " << mat_desc.inverse << ", " - << mat_desc.mat << std::endl; - } - } - } catch (std::exception &e) { - // Just do not print log - } - - return benchmarkMatrix(kernel, num_qubits, mat_seq); -} - -/** - * @brief Benchmark Pennylane-Lightning for a given generator - * - * @param argc Number of arguments - * @param argv Command line arguments - * @return Returns 0 is completed successfully - */ -int main(int argc, char *argv[]) { - namespace Constant = Gates::Constant; - // Handle input - if (argc != 5) { // NOLINT(readability-magic-numbers) - std::cerr << "Wrong number of inputs. User provided " << argc - 1 - << " inputs. \n" - << "Usage: " + std::string(argv[0]) + - " num_reps num_qubits kernel num_wires\n" - "Examples: \n" - << "\t" << argv[0] << " 1000 10 PI 4\n"; - return -1; - } - - size_t num_reps; - size_t num_qubits; - size_t num_wires; - - try { - num_reps = std::stoi(argv[1]); - num_qubits = std::stoi(argv[2]); - num_wires = std::stoi(argv[4]); - } catch (std::exception &e) { - std::cerr << "Arguments num_reps and num_qubits must be integers." - << std::endl; - return -1; - } - - std::string_view kernel_name = argv[3]; - KernelType kernel = string_to_kernel(kernel_name); - if (kernel == KernelType::None) { - std::cerr << "Kernel " << kernel_name << " is unknown." << std::endl; - return 1; - } - - std::random_device rd; - std::mt19937 re(rd()); - - double walltime = - runBenchmarkMatrix(re, kernel, num_reps, num_qubits, num_wires); - - // Output walltime in csv format (Num Qubits, Time (milliseconds)) - std::cout << num_qubits << ", " << walltime / static_cast(num_reps) - << std::endl; - return 0; -} diff --git a/pennylane_lightning/src/examples/run_benchmark.py b/pennylane_lightning/src/examples/run_benchmark.py deleted file mode 100755 index ae20d520b0..0000000000 --- a/pennylane_lightning/src/examples/run_benchmark.py +++ /dev/null @@ -1,153 +0,0 @@ -#!/usr/bin/env python3 -import subprocess -import argparse -import json -from pathlib import Path -from typing import final -import abc - -MIN_NUM_QUBITS = 8 -MAX_NUM_QUBITS = 24 -STEP_NUM_QUBITS = 2 -NUM_GATE_REPS = 1000 - - -class BenchmarkRunner: - def __init__(self, kernel, operation): - self.kernel = kernel - self.operation = operation - - @final - def benchmark(self, res_path): - result = [] - ext_info = self.external_info() - if ext_info: - result.append(ext_info) - try: - for num_qubit in range(MIN_NUM_QUBITS, MAX_NUM_QUBITS + 1, STEP_NUM_QUBITS): - cmd = self.command(num_qubit) - print(f"Run N={num_qubit}, {self.kernel}, {self.operation}") - output = subprocess.run([str(c) for c in cmd], capture_output=True, check=True) - time = output.stdout.decode("utf-8").strip().split(",")[1] - result.append({"N": num_qubit, "time": time}) - except subprocess.CalledProcessError as err: - print("Error from subprocess call. Message:") - print(err.stderr.decode("utf-8")) - except KeyboardInterrupt: - pass - - res_path = Path(res_path) - if not res_path.exists(): - res_path.mkdir(parents=True) - - with res_path.joinpath(self.filename()).open("w") as f: - json.dump(result, f, indent=4) - - @abc.abstractmethod - def command(self, num_qubits): - pass - - @abc.abstractmethod - def external_info(self): - pass - - @abc.abstractmethod - def filename(self): - pass - - -class MatrixBenchmarkRunner(BenchmarkRunner): - def __init__(self, kernel, operation, num_wires): - super().__init__(kernel, operation) - self.num_wires = num_wires - - def command(self, num_qubits): - return ["./benchmark_matrix", NUM_GATE_REPS, num_qubits, self.kernel, self.num_wires] - - def external_info(self): - return {"num_wires": self.num_wires} - - def filename(self): - return f"Matrix_{self.kernel}_{self.num_wires}.json" - - -class GateBenchmarkRunner(BenchmarkRunner): - def __init__(self, kernel, operation, num_wires=None): - super().__init__(kernel, operation) - self.num_wires = num_wires - - def command(self, num_qubits): - cmd = ["./benchmark_gate", NUM_GATE_REPS, num_qubits, self.kernel, self.operation] - if self.num_wires: - cmd.append(self.num_wires) - return cmd - - def external_info(self): - if self.num_wires: - return {"num_wires": self.num_wires} - return None - - def filename(self): - if self.num_wires: - return f"{self.operation}_{self.kernel}_{self.num_wires}.json" - return f"{self.operation}_{self.kernel}.json" - - -class GeneratorBenchmarkRunner(BenchmarkRunner): - def __init__(self, kernel, operation, num_wires=None): - super().__init__(kernel, operation) - self.num_wires = num_wires - - def command(self, num_qubits): - cmd = ["./benchmark_generator", NUM_GATE_REPS, num_qubits, self.kernel, self.operation[9:]] - if self.num_wires is not None: - cmd.append(self.num_wires) - return cmd - - def external_info(self): - if self.num_wires: - return {"num_wires": self.num_wires} - return None - - def filename(self): - if self.num_wires: - return f"{self.operation}_{self.kernel}_{self.num_wires}.json" - return f"{self.operation}_{self.kernel}.json" - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run C++ benchmarks") - parser.add_argument("kernel", help="Kernel to benchmark") - parser.add_argument("operation", help="Operation to benchmark.") - - parser.add_argument( - "num_wires", - help="Number of wires (optional for multi-qubit operations).", - nargs="?", - default=None, - type=int, - ) - - args = parser.parse_args() - - compiler_info_file = "compiler_info.txt" - - try: - with open(compiler_info_file, "r") as f: - res_path = "res_" + f.readline().strip() - except OSError: - print("Encountered an error while opening '{}'".format(compiler_info_file)) - sys.exit(1) - - if args.operation == "Matrix": - if args.num_wires == 0: - raise ValueError( - "Parameter num_wires must be provided and larger than 0 for matrix benchmark." - ) - runner = MatrixBenchmarkRunner(args.kernel, args.operation, args.num_wires) - elif args.operation.startswith("Generator"): - runner = GeneratorBenchmarkRunner(args.kernel, args.operation, args.num_wires) - else: - runner = GateBenchmarkRunner(args.kernel, args.operation, args.num_wires) - - runner.benchmark(res_path) From 0eced01b9a75aaec285288a7a2bdf379dc6859a3 Mon Sep 17 00:00:00 2001 From: Chae-Yeun Park Date: Sat, 7 May 2022 16:55:17 -0400 Subject: [PATCH 94/94] Fix --- .../src/gates/OpToMemberFuncPtr.hpp | 2 +- .../cpu_kernels/GateImplementationsLM.hpp | 2 +- .../cpu_kernels/GateImplementationsPI.hpp | 3 +- .../src/simulator/DefaultKernels.hpp | 131 ------------- .../src/simulator/Measures.cpp | 3 +- .../src/simulator/StateVectorBase.hpp | 14 +- .../src/simulator/StateVectorManagedCPU.hpp | 2 +- .../src/simulator/StateVectorRawCPU.hpp | 2 +- .../src/tests/TestAvailableKernels.hpp | 1 - .../src/tests/TestConstant.hpp | 1 - .../src/tests/Test_StateVectorManaged.cpp | 174 ------------------ 11 files changed, 14 insertions(+), 321 deletions(-) delete mode 100644 pennylane_lightning/src/simulator/DefaultKernels.hpp delete mode 100644 pennylane_lightning/src/tests/Test_StateVectorManaged.cpp diff --git a/pennylane_lightning/src/gates/OpToMemberFuncPtr.hpp b/pennylane_lightning/src/gates/OpToMemberFuncPtr.hpp index d2ca7afc9b..04cf23248a 100644 --- a/pennylane_lightning/src/gates/OpToMemberFuncPtr.hpp +++ b/pennylane_lightning/src/gates/OpToMemberFuncPtr.hpp @@ -427,7 +427,7 @@ template using GeneratorFuncPtrT = typename Internal::GeneratorFuncPtr::Type; /** - * @brief Convinient type alias for MatrixfuncPtr. + * @brief Convenient type alias for MatrixfuncPtr. */ template using MatrixFuncPtrT = typename Internal::MatrixFuncPtr::Type; diff --git a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp index c45e62e9c4..adb1f287bd 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsLM.hpp @@ -22,8 +22,8 @@ #include "GateOperation.hpp" #include "Gates.hpp" #include "KernelType.hpp" -#include "PauliGenerator.hpp" #include "LinearAlgebra.hpp" +#include "PauliGenerator.hpp" #include #include diff --git a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp index b144d90b21..f94d7b12a2 100644 --- a/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp +++ b/pennylane_lightning/src/gates/cpu_kernels/GateImplementationsPI.hpp @@ -24,14 +24,13 @@ #endif /// @endcond -#include "PauliGenerator.hpp" - #include "BitUtil.hpp" #include "GateOperation.hpp" #include "GateUtil.hpp" #include "Gates.hpp" #include "KernelType.hpp" #include "LinearAlgebra.hpp" +#include "PauliGenerator.hpp" #include #include diff --git a/pennylane_lightning/src/simulator/DefaultKernels.hpp b/pennylane_lightning/src/simulator/DefaultKernels.hpp deleted file mode 100644 index 10421ee933..0000000000 --- a/pennylane_lightning/src/simulator/DefaultKernels.hpp +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright 2022 Xanadu Quantum Technologies Inc. - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 - -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/** - * @file - * Defines default kernels for operations - */ -#pragma once - -#include "Constant.hpp" -#include "ConstantUtil.hpp" -#include "KernelType.hpp" - -namespace Pennylane { -/** - * - * @brief Define which kernel to use for each gate operation. - * - * @rst - * Check - * `this repository - * `_ to see - * the benchmark results for each gate - * @endrst - * - * This value is used for: - * 1. StateVector `apply##GATE_NAME` methods. The kernel function is statically - * binded to the given kernel and cannot be modified. - * 2. Default kernel functions for DynamicDispatcher. The kernel function is - * dynamically binded and can be changed using DynamicDispatcher singleton - * class. - * 3. For the Python binding. - */ -[[maybe_unused]] constexpr std::array default_kernel_for_gates = { - std::pair{Gates::GateOperation::Identity, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::PauliX, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::PauliY, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::PauliZ, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::Hadamard, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::S, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::T, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::RX, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::RY, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::RZ, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::PhaseShift, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::Rot, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::ControlledPhaseShift, - Gates::KernelType::PI}, - std::pair{Gates::GateOperation::CNOT, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::CY, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::CZ, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::SWAP, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::IsingXX, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::IsingYY, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::IsingZZ, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::CRX, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::CRY, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::CRZ, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::CRot, Gates::KernelType::LM}, - std::pair{Gates::GateOperation::Toffoli, Gates::KernelType::PI}, - std::pair{Gates::GateOperation::CSWAP, Gates::KernelType::PI}, - std::pair{Gates::GateOperation::MultiRZ, Gates::KernelType::LM}, -}; -/** - * @brief Define which kernel to use for each generator operation. - */ -[[maybe_unused]] constexpr std::array default_kernel_for_generators = { - std::pair{Gates::GeneratorOperation::PhaseShift, Gates::KernelType::PI}, - std::pair{Gates::GeneratorOperation::RX, Gates::KernelType::LM}, - std::pair{Gates::GeneratorOperation::RY, Gates::KernelType::LM}, - std::pair{Gates::GeneratorOperation::RZ, Gates::KernelType::LM}, - std::pair{Gates::GeneratorOperation::IsingXX, Gates::KernelType::LM}, - std::pair{Gates::GeneratorOperation::IsingYY, Gates::KernelType::LM}, - std::pair{Gates::GeneratorOperation::IsingZZ, Gates::KernelType::LM}, - std::pair{Gates::GeneratorOperation::CRX, Gates::KernelType::LM}, - std::pair{Gates::GeneratorOperation::CRY, Gates::KernelType::LM}, - std::pair{Gates::GeneratorOperation::CRZ, Gates::KernelType::LM}, - std::pair{Gates::GeneratorOperation::ControlledPhaseShift, - Gates::KernelType::LM}, - std::pair{Gates::GeneratorOperation::MultiRZ, Gates::KernelType::LM}, -}; - -/** - * @brief Define which kernel to use for each generator operation. - */ -[[maybe_unused]] constexpr std::array default_kernel_for_matrices = { - std::pair{Gates::MatrixOperation::SingleQubitOp, Gates::KernelType::LM}, - std::pair{Gates::MatrixOperation::TwoQubitOp, Gates::KernelType::LM}, - std::pair{Gates::MatrixOperation::MultiQubitOp, Gates::KernelType::PI}, -}; - -/** - * @brief Return default kernel for gate operation - * - * @param gate_op Gate operation - */ -constexpr auto getDefaultKernelForGate(Gates::GateOperation gate_op) - -> Gates::KernelType { - return Util::lookup(default_kernel_for_gates, gate_op); -} - -/** - * @brief Return default kernel for generator operation - * - * @param gntr_op Generator operation - */ -constexpr auto getDefaultKernelForGenerator(Gates::GeneratorOperation gntr_op) - -> Gates::KernelType { - return Util::lookup(default_kernel_for_generators, gntr_op); -} - -/** - * @brief Return default kernel for matrix operation - * - * @param mat_op Matrix operation - */ -constexpr auto getDefaultKernelForMatrix(Gates::MatrixOperation mat_op) - -> Gates::KernelType { - return Util::lookup(default_kernel_for_matrices, mat_op); -} -} // namespace Pennylane diff --git a/pennylane_lightning/src/simulator/Measures.cpp b/pennylane_lightning/src/simulator/Measures.cpp index 6e961f7fd5..ff04a033ab 100644 --- a/pennylane_lightning/src/simulator/Measures.cpp +++ b/pennylane_lightning/src/simulator/Measures.cpp @@ -16,4 +16,5 @@ // explicit instantiation template class Pennylane::Measures>; -template class Pennylane::Measures>; +template class Pennylane::Measures>; diff --git a/pennylane_lightning/src/simulator/StateVectorBase.hpp b/pennylane_lightning/src/simulator/StateVectorBase.hpp index 811174466d..6d2b0a3002 100644 --- a/pennylane_lightning/src/simulator/StateVectorBase.hpp +++ b/pennylane_lightning/src/simulator/StateVectorBase.hpp @@ -233,16 +233,16 @@ template class StateVectorBase { * inverted. */ void applyOperations(const std::vector &ops, - const std::vector> &wires, - const std::vector &inverse) { + const std::vector> &ops_wires, + const std::vector &ops_inverse) { const size_t numOperations = ops.size(); - if (numOperations != wires.size()) { + if (numOperations != ops_wires.size()) { throw std::invalid_argument( "Invalid arguments: number of operations, wires, and " "parameters must all be equal"); } for (size_t i = 0; i < numOperations; i++) { - applyOperation(ops[i], wires[i], inverse[i], {}); + applyOperation(ops[i], ops_wires[i], ops_inverse[i], {}); } } @@ -275,9 +275,9 @@ template class StateVectorBase { bool adj = false) -> PrecisionT { auto *arr = getData(); const auto &dispatcher = DynamicDispatcher::getInstance(); - return dispatcher.applyGenerator( - getKernelForGenerator(dispatcher.strToGeneratorOp(opName)), arr, - num_qubits_, opName, wires, adj); + const auto gntr_op = dispatcher.strToGeneratorOp(opName); + return dispatcher.applyGenerator(getKernelForGenerator(gntr_op), arr, + num_qubits_, opName, wires, adj); } /** diff --git a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp index 35e93478e1..f5cd98aad0 100644 --- a/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorManagedCPU.hpp @@ -49,7 +49,7 @@ class StateVectorManagedCPU * @param memory_model Memory model the statevector will use */ explicit StateVectorManagedCPU( - size_t num_qubits, Threading threading = bestThreading(), + size_t num_qubits, Threading threading = Threading::SingleThread, CPUMemoryModel memory_model = bestCPUMemoryModel()) : BaseType{num_qubits, threading, memory_model}, data_{Util::exp2(num_qubits), ComplexPrecisionT{0.0, 0.0}, diff --git a/pennylane_lightning/src/simulator/StateVectorRawCPU.hpp b/pennylane_lightning/src/simulator/StateVectorRawCPU.hpp index 92de97be20..eca8d9c694 100644 --- a/pennylane_lightning/src/simulator/StateVectorRawCPU.hpp +++ b/pennylane_lightning/src/simulator/StateVectorRawCPU.hpp @@ -65,7 +65,7 @@ class StateVectorRawCPU * @param threading Threading option the statevector to use */ StateVectorRawCPU(ComplexPrecisionT *data, size_t length, - Threading threading = bestThreading()) + Threading threading = Threading::SingleThread) : BaseType{Util::log2PerfectPower(length), threading, getMemoryModel(static_cast(data))}, data_{data}, length_(length) { diff --git a/pennylane_lightning/src/tests/TestAvailableKernels.hpp b/pennylane_lightning/src/tests/TestAvailableKernels.hpp index b11a567d72..1139abb961 100644 --- a/pennylane_lightning/src/tests/TestAvailableKernels.hpp +++ b/pennylane_lightning/src/tests/TestAvailableKernels.hpp @@ -1,6 +1,5 @@ #include "AvailableKernels.hpp" #include "Constant.hpp" -#include "DefaultKernels.hpp" #include "KernelType.hpp" #include "SelectKernel.hpp" #include "Util.hpp" diff --git a/pennylane_lightning/src/tests/TestConstant.hpp b/pennylane_lightning/src/tests/TestConstant.hpp index 7f6d3b67f9..478041b32a 100644 --- a/pennylane_lightning/src/tests/TestConstant.hpp +++ b/pennylane_lightning/src/tests/TestConstant.hpp @@ -1,6 +1,5 @@ #include "Constant.hpp" #include "ConstantUtil.hpp" -#include "DefaultKernels.hpp" #include "GateOperation.hpp" #include "Util.hpp" diff --git a/pennylane_lightning/src/tests/Test_StateVectorManaged.cpp b/pennylane_lightning/src/tests/Test_StateVectorManaged.cpp deleted file mode 100644 index fdbb30af06..0000000000 --- a/pennylane_lightning/src/tests/Test_StateVectorManaged.cpp +++ /dev/null @@ -1,174 +0,0 @@ -#include "DefaultKernels.hpp" -#include "LinearAlgebra.hpp" -#include "StateVectorManaged.hpp" -#include "StateVectorRaw.hpp" -#include "Util.hpp" -#include "cpu_kernels/GateImplementationsPI.hpp" - -#include "TestHelpers.hpp" -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace Pennylane; - -TEMPLATE_TEST_CASE("StateVectorManaged::StateVectorManaged", - "[StateVectorManaged]", float, double) { - using PrecisionT = TestType; - - SECTION("StateVectorManaged") { - REQUIRE(!std::is_constructible_v>); - } - SECTION("StateVectorManaged") { - REQUIRE(!std::is_constructible_v>); - } - SECTION("StateVectorManaged {size_t}") { - REQUIRE(std::is_constructible_v, size_t>); - const size_t num_qubits = 4; - StateVectorManaged sv(num_qubits); - - REQUIRE(sv.getNumQubits() == 4); - REQUIRE(sv.getLength() == 16); - REQUIRE(sv.getDataVector().size() == 16); - } - SECTION("StateVectorManaged {const StateVectorRaw&}") { - REQUIRE(std::is_constructible_v, - const StateVectorRaw &>); - } - SECTION( - "StateVectorManaged {const StateVectorManaged&}") { - REQUIRE(std::is_copy_constructible_v>); - } - SECTION("StateVectorManaged {StateVectorManaged&&}") { - REQUIRE(std::is_move_constructible_v>); - } -} - -TEMPLATE_TEST_CASE("StateVectorManaged::applyMatrix with std::vector", - "[StateVectorManaged]", float, double) { - using PrecisionT = TestType; - SECTION("Test wrong matrix size") { - std::vector> m(7, 0.0); - const size_t num_qubits = 4; - StateVectorManaged sv(num_qubits); - REQUIRE_THROWS_WITH( - sv.applyMatrix(m, {0, 1}), - Catch::Contains( - "The size of matrix does not match with the given")); - } - - SECTION("Test wrong number of wires") { - std::vector> m(8, 0.0); - const size_t num_qubits = 4; - StateVectorManaged sv(num_qubits); - REQUIRE_THROWS_WITH( - sv.applyMatrix(m, {0}), - Catch::Contains( - "The size of matrix does not match with the given")); - } -} - -TEMPLATE_TEST_CASE("StateVectorManaged::applyMatrix with a pointer", - "[StateVectorManaged]", float, double) { - using PrecisionT = TestType; - SECTION("Test wrong matrix") { - std::vector> m(8, 0.0); - const size_t num_qubits = 4; - StateVectorManaged sv(num_qubits); - REQUIRE_THROWS_WITH(sv.applyMatrix(m.data(), {}), - Catch::Contains("must be larger than 0")); - } - - SECTION("Test with different number of wires") { - std::default_random_engine re{1337}; - const size_t num_qubits = 5; - for (size_t num_wires = 1; num_wires < num_qubits; num_wires++) { - StateVectorManaged sv1(num_qubits); - StateVectorManaged sv2(num_qubits); - - std::vector wires(num_wires); - std::iota(wires.begin(), wires.end(), 0); - - const auto m = Util::randomUnitary(re, num_wires); - sv1.applyMatrix(m, wires); - Gates::GateImplementationsPI::applyMultiQubitOp( - sv2.getData(), num_qubits, m.data(), wires, false); - REQUIRE(sv1.getDataVector() == - approx(sv2.getDataVector()).margin(PrecisionT{1e-5})); - } - } -} - -TEMPLATE_TEST_CASE("StateVectorManaged::applyOperations", - "[StateVectorManaged]", float, double) { - using PrecisionT = TestType; - - std::mt19937 re{1337}; - - SECTION("Test invalid arguments without params") { - const size_t num_qubits = 4; - StateVectorManaged sv(num_qubits); - REQUIRE_THROWS_WITH( - sv.applyOperations({"PauliX", "PauliY"}, {{0}}, {false, false}), - Catch::Contains("must all be equal")); // invalid wires - REQUIRE_THROWS_WITH( - sv.applyOperations({"PauliX", "PauliY"}, {{0}, {1}}, {false}), - Catch::Contains("must all be equal")); // invalid inverse - } - - SECTION("applyOperations without params works as expected") { - const size_t num_qubits = 3; - StateVectorManaged sv1(num_qubits); - - sv1.updateData(createRandomState(re, num_qubits)); - StateVectorManaged sv2 = sv1; - - sv1.applyOperations({"PauliX", "PauliY"}, {{0}, {1}}, {false, false}); - - sv2.applyOperation("PauliX", {0}, false); - sv2.applyOperation("PauliY", {1}, false); - - REQUIRE(sv1.getDataVector() == approx(sv2.getDataVector())); - } - - SECTION("Test invalid arguments with params") { - const size_t num_qubits = 4; - StateVectorManaged sv(num_qubits); - REQUIRE_THROWS_WITH( - sv.applyOperations({"RX", "RY"}, {{0}}, {false, false}, - {{0.0}, {0.0}}), - Catch::Contains("must all be equal")); // invalid wires - REQUIRE_THROWS_WITH( - sv.applyOperations({"RX", "RY"}, {{0}, {1}}, {false}, - {{0.0}, {0.0}}), - Catch::Contains("must all be equal")); // invalid inverse - - REQUIRE_THROWS_WITH( - sv.applyOperations({"RX", "RY"}, {{0}, {1}}, {false, false}, - {{0.0}}), - Catch::Contains("must all be equal")); // invalid params - } - - SECTION("applyOperations with params works as expected") { - const size_t num_qubits = 3; - StateVectorManaged sv1(num_qubits); - - sv1.updateData(createRandomState(re, num_qubits)); - StateVectorManaged sv2 = sv1; - - sv1.applyOperations({"RX", "RY"}, {{0}, {1}}, {false, false}, - {{0.1}, {0.2}}); - - sv2.applyOperation("RX", {0}, false, {0.1}); - sv2.applyOperation("RY", {1}, false, {0.2}); - - REQUIRE(sv1.getDataVector() == approx(sv2.getDataVector())); - } -}