Skip to content

Commit d3af819

Browse files
authored
Merge branch 'add_LGPUMPI' into add_py_LGPUMPI
2 parents 6ad1c7c + 84e2eb6 commit d3af819

15 files changed

+103
-22
lines changed

pennylane_lightning/core/src/algorithms/tests/mpi/Test_AdjointJacobianMPI.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ template <typename TypeList> void testAdjointJacobian() {
5858
using ComplexT = typename StateVectorT::ComplexT;
5959

6060
MPIManager mpi_manager(MPI_COMM_WORLD);
61+
CHECK(mpi_manager.getSize() == 2);
6162

6263
const std::vector<PrecisionT> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
6364

@@ -81,6 +82,7 @@ template <typename TypeList> void testAdjointJacobian() {
8182

8283
int nDevices = 0; // Number of GPU devices per node
8384
cudaGetDeviceCount(&nDevices);
85+
CHECK(nDevices >= 2);
8486
int deviceId = mpi_manager.getRank() % nDevices;
8587
cudaSetDevice(deviceId);
8688
DevTag<int> dt_local(deviceId, 0);
@@ -131,6 +133,7 @@ template <typename TypeList> void testAdjointJacobian() {
131133

132134
int nDevices = 0; // Number of GPU devices per node
133135
cudaGetDeviceCount(&nDevices);
136+
CHECK(nDevices >= 2);
134137
int deviceId = mpi_manager.getRank() % nDevices;
135138
cudaSetDevice(deviceId);
136139
DevTag<int> dt_local(deviceId, 0);
@@ -181,6 +184,7 @@ template <typename TypeList> void testAdjointJacobian() {
181184

182185
int nDevices = 0; // Number of GPU devices per node
183186
cudaGetDeviceCount(&nDevices);
187+
CHECK(nDevices >= 2);
184188
int deviceId = mpi_manager.getRank() % nDevices;
185189
cudaSetDevice(deviceId);
186190
DevTag<int> dt_local(deviceId, 0);
@@ -293,6 +297,7 @@ template <typename TypeList> void testAdjointJacobian() {
293297

294298
int nDevices = 0; // Number of GPU devices per node
295299
cudaGetDeviceCount(&nDevices);
300+
CHECK(nDevices >= 2);
296301
int deviceId = mpi_manager.getRank() % nDevices;
297302
cudaSetDevice(deviceId);
298303
DevTag<int> dt_local(deviceId, 0);
@@ -344,6 +349,7 @@ template <typename TypeList> void testAdjointJacobian() {
344349

345350
int nDevices = 0; // Number of GPU devices per node
346351
cudaGetDeviceCount(&nDevices);
352+
CHECK(nDevices >= 2);
347353
int deviceId = mpi_manager.getRank() % nDevices;
348354
cudaSetDevice(deviceId);
349355
DevTag<int> dt_local(deviceId, 0);

pennylane_lightning/core/src/measurements/tests/mpi/Test_MeasurementsBaseMPI.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ template <typename TypeList> void testProbabilities() {
6868
size_t num_qubits = 3;
6969

7070
MPIManager mpi_manager(MPI_COMM_WORLD);
71+
CHECK(mpi_manager.getSize() == 2);
7172

7273
size_t mpi_buffersize = 1;
7374

@@ -77,6 +78,7 @@ template <typename TypeList> void testProbabilities() {
7778

7879
int nDevices = 0;
7980
cudaGetDeviceCount(&nDevices);
81+
CHECK(nDevices >= 2);
8082
int deviceId = mpi_manager.getRank() % nDevices;
8183
cudaSetDevice(deviceId);
8284
DevTag<int> dt_local(deviceId, 0);
@@ -127,6 +129,7 @@ template <typename TypeList> void testNamedObsExpval() {
127129
size_t num_qubits = 3;
128130

129131
MPIManager mpi_manager(MPI_COMM_WORLD);
132+
CHECK(mpi_manager.getSize() == 2);
130133

131134
size_t mpi_buffersize = 1;
132135

@@ -136,6 +139,7 @@ template <typename TypeList> void testNamedObsExpval() {
136139

137140
int nDevices = 0;
138141
cudaGetDeviceCount(&nDevices);
142+
CHECK(nDevices >= 2);
139143
int deviceId = mpi_manager.getRank() % nDevices;
140144
cudaSetDevice(deviceId);
141145
DevTag<int> dt_local(deviceId, 0);
@@ -197,6 +201,7 @@ template <typename TypeList> void testHermitianObsExpval() {
197201
size_t num_qubits = 3;
198202

199203
MPIManager mpi_manager(MPI_COMM_WORLD);
204+
CHECK(mpi_manager.getSize() == 2);
200205

201206
size_t mpi_buffersize = 1;
202207

@@ -206,6 +211,7 @@ template <typename TypeList> void testHermitianObsExpval() {
206211

207212
int nDevices = 0;
208213
cudaGetDeviceCount(&nDevices);
214+
CHECK(nDevices >= 2);
209215
int deviceId = mpi_manager.getRank() % nDevices;
210216
cudaSetDevice(deviceId);
211217
DevTag<int> dt_local(deviceId, 0);
@@ -294,6 +300,7 @@ template <typename TypeList> void testNamedObsVar() {
294300
size_t num_qubits = 3;
295301

296302
MPIManager mpi_manager(MPI_COMM_WORLD);
303+
CHECK(mpi_manager.getSize() == 2);
297304

298305
size_t mpi_buffersize = 1;
299306

@@ -303,6 +310,7 @@ template <typename TypeList> void testNamedObsVar() {
303310

304311
int nDevices = 0;
305312
cudaGetDeviceCount(&nDevices);
313+
CHECK(nDevices >= 2);
306314
int deviceId = mpi_manager.getRank() % nDevices;
307315
cudaSetDevice(deviceId);
308316
DevTag<int> dt_local(deviceId, 0);
@@ -365,6 +373,7 @@ template <typename TypeList> void testHermitianObsVar() {
365373
size_t num_qubits = 3;
366374

367375
MPIManager mpi_manager(MPI_COMM_WORLD);
376+
CHECK(mpi_manager.getSize() == 2);
368377

369378
size_t mpi_buffersize = 1;
370379

@@ -374,6 +383,7 @@ template <typename TypeList> void testHermitianObsVar() {
374383

375384
int nDevices = 0;
376385
cudaGetDeviceCount(&nDevices);
386+
CHECK(nDevices >= 2);
377387
int deviceId = mpi_manager.getRank() % nDevices;
378388
cudaSetDevice(deviceId);
379389
DevTag<int> dt_local(deviceId, 0);
@@ -470,6 +480,7 @@ template <typename TypeList> void testSamples() {
470480
size_t num_qubits = 3;
471481

472482
MPIManager mpi_manager(MPI_COMM_WORLD);
483+
CHECK(mpi_manager.getSize() == 2);
473484

474485
size_t mpi_buffersize = 1;
475486

@@ -479,6 +490,7 @@ template <typename TypeList> void testSamples() {
479490

480491
int nDevices = 0;
481492
cudaGetDeviceCount(&nDevices);
493+
CHECK(nDevices >= 2);
482494
int deviceId = mpi_manager.getRank() % nDevices;
483495
cudaSetDevice(deviceId);
484496
DevTag<int> dt_local(deviceId, 0);

pennylane_lightning/core/src/observables/tests/mpi/Test_ObservablesMPI.cpp

-14
Original file line numberDiff line numberDiff line change
@@ -422,20 +422,6 @@ template <typename TypeList> void testHamiltonianBase() {
422422

423423
REQUIRE(ham1->getWires() == std::vector<size_t>{0, 5, 9});
424424
}
425-
426-
/*
427-
DYNAMIC_SECTION("applyInPlace must fail - "
428-
<< StateVectorMPIToName<StateVectorT>::name) {
429-
auto ham =
430-
HamiltonianT::create({PrecisionT{1.0}, h, h}, {zz, x1, x2});
431-
auto st_data = createZeroState<ComplexT>(2);
432-
433-
StateVectorT state_vector(st_data.data(), st_data.size());
434-
435-
REQUIRE_THROWS_AS(ham->applyInPlace(state_vector),
436-
LightningException);
437-
}
438-
*/
439425
}
440426
testHamiltonianBase<typename TypeList::Next>();
441427
}

pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -1867,6 +1867,7 @@ class StateVectorCudaMPI
18671867
// LCOV_EXCL_STOP
18681868

18691869
cuDoubleComplex expect_;
1870+
18701871
// compute expectation
18711872
PL_CUSTATEVEC_IS_SUCCESS(custatevecComputeExpectation(
18721873
/* custatevecHandle_t */ handle_.get(),

pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/tests/mpi/Test_AdjointJacobianGPUMPI.cpp

+16
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobianMPI Op=RX, Obs=[Z,Z]",
4747
using StateVectorT = StateVectorCudaMPI<double>;
4848

4949
MPIManager mpi_manager(MPI_COMM_WORLD);
50+
CHECK(mpi_manager.getSize() == 2);
5051

5152
AdjointJacobianMPI<StateVectorT> adj;
5253
std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
@@ -66,6 +67,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobianMPI Op=RX, Obs=[Z,Z]",
6667

6768
int nDevices = 0; // Number of GPU devices per node
6869
cudaGetDeviceCount(&nDevices);
70+
CHECK(nDevices >= 2);
6971
int deviceId = mpi_manager.getRank() % nDevices;
7072
cudaSetDevice(deviceId);
7173
DevTag<int> dt_local(deviceId, 0);
@@ -104,6 +106,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobianMPI Op=[QubitStateVector, "
104106
using StateVectorT = StateVectorCudaMPI<double>;
105107

106108
MPIManager mpi_manager(MPI_COMM_WORLD);
109+
CHECK(mpi_manager.getSize() == 2);
107110

108111
AdjointJacobianMPI<StateVectorT> adj;
109112
std::vector<double> param{-M_PI / 7, M_PI / 5, 2 * M_PI / 3};
@@ -124,6 +127,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobianMPI Op=[QubitStateVector, "
124127

125128
int nDevices = 0; // Number of GPU devices per node
126129
cudaGetDeviceCount(&nDevices);
130+
CHECK(nDevices >= 2);
127131
int deviceId = mpi_manager.getRank() % nDevices;
128132
cudaSetDevice(deviceId);
129133
DevTag<int> dt_local(deviceId, 0);
@@ -167,6 +171,7 @@ TEST_CASE(
167171
std::vector<double> jacobian_serial(num_obs * tp.size(), 0);
168172

169173
MPIManager mpi_manager(MPI_COMM_WORLD);
174+
CHECK(mpi_manager.getSize() == 2);
170175

171176
size_t mpi_buffersize = 1;
172177

@@ -177,6 +182,7 @@ TEST_CASE(
177182

178183
int nDevices = 0; // Number of GPU devices per node
179184
cudaGetDeviceCount(&nDevices);
185+
CHECK(nDevices >= 2);
180186
int deviceId = mpi_manager.getRank() % nDevices;
181187
cudaSetDevice(deviceId);
182188
DevTag<int> dt_local(deviceId, 0);
@@ -234,6 +240,7 @@ TEST_CASE(
234240
std::vector<double> jacobian_serial(num_obs * tp.size(), 0);
235241

236242
MPIManager mpi_manager(MPI_COMM_WORLD);
243+
CHECK(mpi_manager.getSize() == 2);
237244

238245
size_t mpi_buffersize = 1;
239246

@@ -244,6 +251,7 @@ TEST_CASE(
244251

245252
int nDevices = 0; // Number of GPU devices per node
246253
cudaGetDeviceCount(&nDevices);
254+
CHECK(nDevices >= 2);
247255
int deviceId = mpi_manager.getRank() % nDevices;
248256
cudaSetDevice(deviceId);
249257
DevTag<int> dt_local(deviceId, 0);
@@ -297,6 +305,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobian Op=[RX,RX,RX], Obs=[ZZZ]",
297305
std::vector<double> jacobian_serial(num_obs * tp.size(), 0);
298306

299307
MPIManager mpi_manager(MPI_COMM_WORLD);
308+
CHECK(mpi_manager.getSize() == 2);
300309

301310
size_t mpi_buffersize = 1;
302311

@@ -307,6 +316,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobian Op=[RX,RX,RX], Obs=[ZZZ]",
307316

308317
int nDevices = 0; // Number of GPU devices per node
309318
cudaGetDeviceCount(&nDevices);
319+
CHECK(nDevices >= 2);
310320
int deviceId = mpi_manager.getRank() % nDevices;
311321
cudaSetDevice(deviceId);
312322
DevTag<int> dt_local(deviceId, 0);
@@ -358,6 +368,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobian Op=Mixed, Obs=[XXX]",
358368
std::vector<double> jacobian_serial(num_obs * tp.size(), 0);
359369

360370
MPIManager mpi_manager(MPI_COMM_WORLD);
371+
CHECK(mpi_manager.getSize() == 2);
361372

362373
size_t mpi_buffersize = 1;
363374

@@ -368,6 +379,7 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobian Op=Mixed, Obs=[XXX]",
368379

369380
int nDevices = 0; // Number of GPU devices per node
370381
cudaGetDeviceCount(&nDevices);
382+
CHECK(nDevices >= 2);
371383
int deviceId = mpi_manager.getRank() % nDevices;
372384
cudaSetDevice(deviceId);
373385
DevTag<int> dt_local(deviceId, 0);
@@ -436,6 +448,7 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPUMPI Op=[RX,RX,RX], "
436448
std::vector<double> jacobian_serial(num_obs * tp.size(), 0);
437449

438450
MPIManager mpi_manager(MPI_COMM_WORLD);
451+
CHECK(mpi_manager.getSize() == 2);
439452

440453
size_t mpi_buffersize = 1;
441454

@@ -446,6 +459,7 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPUMPI Op=[RX,RX,RX], "
446459

447460
int nDevices = 0; // Number of GPU devices per node
448461
cudaGetDeviceCount(&nDevices);
462+
CHECK(nDevices >= 2);
449463
int deviceId = mpi_manager.getRank() % nDevices;
450464
cudaSetDevice(deviceId);
451465
DevTag<int> dt_local(deviceId, 0);
@@ -504,6 +518,7 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPU Test HermitianObs",
504518
std::vector<double> jacobian2_serial(num_obs * tp.size(), 0);
505519

506520
MPIManager mpi_manager(MPI_COMM_WORLD);
521+
CHECK(mpi_manager.getSize() == 2);
507522

508523
size_t mpi_buffersize = 1;
509524

@@ -514,6 +529,7 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPU Test HermitianObs",
514529

515530
int nDevices = 0; // Number of GPU devices per node
516531
cudaGetDeviceCount(&nDevices);
532+
CHECK(nDevices >= 2);
517533
int deviceId = mpi_manager.getRank() % nDevices;
518534
cudaSetDevice(deviceId);
519535
DevTag<int> dt_local(deviceId, 0);

pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_Generators.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ using namespace Pennylane::LightningGPU::MPI;
6464
using cp_t = std::complex<TestType>; \
6565
using PrecisionT = TestType; \
6666
MPIManager mpi_manager(MPI_COMM_WORLD); \
67+
CHECK(mpi_manager.getSize() == 2); \
6768
size_t mpi_buffersize = 1; \
6869
size_t nGlobalIndexBits = \
6970
std::bit_width(static_cast<size_t>(mpi_manager.getSize())) - 1; \
@@ -82,6 +83,7 @@ using namespace Pennylane::LightningGPU::MPI;
8283
mpi_manager.Barrier(); \
8384
int nDevices = 0; \
8485
cudaGetDeviceCount(&nDevices); \
86+
CHECK(nDevices >= 2); \
8587
int deviceId = mpi_manager.getRank() % nDevices; \
8688
cudaSetDevice(deviceId); \
8789
DevTag<int> dt_local(deviceId, 0); \

pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_NonParam.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetStateVector",
9090
using PrecisionT = TestType;
9191
using cp_t = std::complex<PrecisionT>;
9292
MPIManager mpi_manager(MPI_COMM_WORLD);
93+
CHECK(mpi_manager.getSize() == 2);
9394

9495
size_t mpi_buffersize = 1;
9596

@@ -132,6 +133,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetStateVector",
132133

133134
int nDevices = 0; // Number of GPU devices per node
134135
cudaGetDeviceCount(&nDevices);
136+
CHECK(nDevices >= 2);
135137
int deviceId = mpi_manager.getRank() % nDevices;
136138
cudaSetDevice(deviceId);
137139
DevTag<int> dt_local(deviceId, 0);
@@ -165,6 +167,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetIthStates",
165167
using PrecisionT = TestType;
166168
using cp_t = std::complex<PrecisionT>;
167169
MPIManager mpi_manager(MPI_COMM_WORLD);
170+
CHECK(mpi_manager.getSize() == 2);
168171

169172
size_t mpi_buffersize = 1;
170173

@@ -193,6 +196,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetIthStates",
193196

194197
int nDevices = 0; // Number of GPU devices per node
195198
cudaGetDeviceCount(&nDevices);
199+
CHECK(nDevices >= 2);
196200
int deviceId = mpi_manager.getRank() % nDevices;
197201
cudaSetDevice(deviceId);
198202
DevTag<int> dt_local(deviceId, 0);
@@ -219,6 +223,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetIthStates",
219223
using cp_t = std::complex<TestType>; \
220224
using PrecisionT = TestType; \
221225
MPIManager mpi_manager(MPI_COMM_WORLD); \
226+
CHECK(mpi_manager.getSize() == 2); \
222227
size_t mpi_buffersize = 1; \
223228
size_t nGlobalIndexBits = \
224229
std::bit_width(static_cast<size_t>(mpi_manager.getSize())) - 1; \
@@ -237,6 +242,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetIthStates",
237242
mpi_manager.Barrier(); \
238243
int nDevices = 0; \
239244
cudaGetDeviceCount(&nDevices); \
245+
CHECK(nDevices >= 2); \
240246
int deviceId = mpi_manager.getRank() % nDevices; \
241247
cudaSetDevice(deviceId); \
242248
DevTag<int> dt_local(deviceId, 0); \

pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_Param.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ using namespace Pennylane::LightningGPU::MPI;
6363
using cp_t = std::complex<TestType>; \
6464
using PrecisionT = TestType; \
6565
MPIManager mpi_manager(MPI_COMM_WORLD); \
66+
CHECK(mpi_manager.getSize() == 2); \
6667
size_t mpi_buffersize = 1; \
6768
size_t nGlobalIndexBits = \
6869
std::bit_width(static_cast<size_t>(mpi_manager.getSize())) - 1; \
@@ -81,6 +82,7 @@ using namespace Pennylane::LightningGPU::MPI;
8182
mpi_manager.Barrier(); \
8283
int nDevices = 0; \
8384
cudaGetDeviceCount(&nDevices); \
85+
CHECK(nDevices >= 2); \
8486
int deviceId = mpi_manager.getRank() % nDevices; \
8587
cudaSetDevice(deviceId); \
8688
DevTag<int> dt_local(deviceId, 0); \

0 commit comments

Comments
 (0)