diff --git a/source/api_cc/src/DeepPotPT.cc b/source/api_cc/src/DeepPotPT.cc index 84629042f4..4c7aac19b8 100644 --- a/source/api_cc/src/DeepPotPT.cc +++ b/source/api_cc/src/DeepPotPT.cc @@ -168,7 +168,7 @@ void DeepPotPT::compute(ENERGYVTYPE& ener, nlist_data.copy_from_nlist(lmp_list); nlist_data.shuffle_exclude_empty(fwd_map); nlist_data.padding(); - if (do_message_passing == 1 && nghost > 0) { + if (do_message_passing == 1) { int nswap = lmp_list.nswap; torch::Tensor sendproc_tensor = torch::from_blob(lmp_list.sendproc, {nswap}, int32_option); @@ -180,10 +180,14 @@ void DeepPotPT::compute(ENERGYVTYPE& ener, torch::from_blob(lmp_list.recvnum, {nswap}, int32_option); torch::Tensor sendnum_tensor = torch::from_blob(lmp_list.sendnum, {nswap}, int32_option); - torch::Tensor communicator_tensor = torch::from_blob( - const_cast(lmp_list.world), {1}, torch::kInt64); - // torch::Tensor communicator_tensor = - // torch::tensor(lmp_list.world, int32_option); + torch::Tensor communicator_tensor; + if (lmp_list.world == 0) { + communicator_tensor = torch::empty({1}, torch::kInt64); + } else { + communicator_tensor = torch::from_blob( + const_cast(lmp_list.world), {1}, torch::kInt64); + } + torch::Tensor nswap_tensor = torch::tensor(nswap, int32_option); int total_send = std::accumulate(lmp_list.sendnum, lmp_list.sendnum + nswap, 0); @@ -196,12 +200,6 @@ void DeepPotPT::compute(ENERGYVTYPE& ener, comm_dict.insert("recv_num", recvnum_tensor); comm_dict.insert("communicator", communicator_tensor); } - if (do_message_passing == 1 && nghost == 0) { - // for the situation that no ghost atoms (e.g. serial nopbc) - // set the mapping arange(nloc) is enough - auto option = torch::TensorOptions().device(device).dtype(torch::kInt64); - mapping_tensor = at::arange(nloc_real, option).unsqueeze(0); - } } at::Tensor firstneigh = createNlistTensor(nlist_data.jlist); firstneigh_tensor = firstneigh.to(torch::kInt64).to(device); @@ -224,7 +222,7 @@ void DeepPotPT::compute(ENERGYVTYPE& ener, .to(device); } c10::Dict outputs = - (do_message_passing == 1 && nghost > 0) + (do_message_passing == 1) ? module .run_method("forward_lower", coord_wrapped_Tensor, atype_Tensor, firstneigh_tensor, mapping_tensor, fparam_tensor, diff --git a/source/lmp/tests/test_lammps_dpa_pt_nopbc.py b/source/lmp/tests/test_lammps_dpa_pt_nopbc.py index 15fe2c0bc2..b0909bfc03 100644 --- a/source/lmp/tests/test_lammps_dpa_pt_nopbc.py +++ b/source/lmp/tests/test_lammps_dpa_pt_nopbc.py @@ -681,14 +681,6 @@ def test_pair_deepmd_si(lammps_si): [(["--balance"],), ([],)], ) def test_pair_deepmd_mpi(balance_args: list): - if balance_args == []: - # python:5331 terminated with signal 11 at PC=7f3e940e3806 SP=7ffd5787edc0. Backtrace: - # /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0x95806)[0x7f3e940e3806] - # /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0x8f76e)[0x7f3e940dd76e] - # /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0x9a38a)[0x7f3e940e838a] - # /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(_Z9border_opRKN2at6TensorES2_S2_S2_S2_S2_S2_S2_S2_+0x8e)[0x7f3e940dda63] - # /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0xaeac3)[0x7f3e940fcac3] - pytest.skip(reason="Known segfault, see comments for details") with tempfile.NamedTemporaryFile() as f: sp.check_call( [ diff --git a/source/op/pt/comm.cc b/source/op/pt/comm.cc index a25dfbd542..71a2b0e118 100644 --- a/source/op/pt/comm.cc +++ b/source/op/pt/comm.cc @@ -87,16 +87,18 @@ class Border : public torch::autograd::Function { int mpi_init = 0; MPI_Initialized(&mpi_init); int cuda_aware = 1; - int me; + int me = 0; MPI_Comm world; int world_size = 0; - unpack_communicator(communicator_tensor, world); - MPI_Comm_rank(world, &me); - MPI_Comm_size(world, &world_size); + if (mpi_init) { + unpack_communicator(communicator_tensor, world); + MPI_Comm_rank(world, &me); + MPI_Comm_size(world, &world_size); + } MPI_Datatype mpi_type = get_mpi_type(); MPI_Request request; #if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_ROCM) - if (world_size != 1) { + if (world_size >= 1) { int version, subversion; MPI_Get_version(&version, &subversion); if (version >= 4) { @@ -120,11 +122,15 @@ class Border : public torch::autograd::Function { for (int iswap = 0; iswap < nswap; ++iswap) { int nrecv = recvnum[iswap]; int nsend = sendnum[iswap]; - torch::Tensor isendlist = - torch::from_blob(sendlist[iswap], {nsend}, int32_options) - .to(recv_g1_tensor.device()); - torch::Tensor send_g1_tensor = recv_g1_tensor.index_select(0, isendlist); - FPTYPE* send_g1 = send_g1_tensor.data_ptr(); + torch::Tensor isendlist; + torch::Tensor send_g1_tensor; + FPTYPE* send_g1; + if (nsend != 0) { + isendlist = torch::from_blob(sendlist[iswap], {nsend}, int32_options) + .to(recv_g1_tensor.device()); + send_g1_tensor = recv_g1_tensor.index_select(0, isendlist); + send_g1 = send_g1_tensor.data_ptr(); + } #ifdef USE_MPI if (sendproc[iswap] != me) { if (nrecv) { @@ -207,15 +213,17 @@ class Border : public torch::autograd::Function { MPI_Initialized(&mpi_init); int world_size = 0; int cuda_aware = 1; + int me = 0; MPI_Comm world; - unpack_communicator(communicator_tensor, world); - int me; - MPI_Comm_rank(world, &me); - MPI_Comm_size(world, &world_size); + if (mpi_init) { + unpack_communicator(communicator_tensor, world); + MPI_Comm_rank(world, &me); + MPI_Comm_size(world, &world_size); + } MPI_Datatype mpi_type = get_mpi_type(); MPI_Request request; #if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_ROCM) - if (world_size != 1) { + if (world_size >= 1) { int version, subversion; MPI_Get_version(&version, &subversion); if (version >= 4) { @@ -248,17 +256,20 @@ class Border : public torch::autograd::Function { int nlocal = nlocal_tensor.item(); int nghost = nghost_tensor.item(); int ntotal = nlocal + nghost; - - torch::Tensor send_g1_tensor = d_local_g1_tensor; - - int max_recvnum = sendnum_tensor.max().item(); - auto options = torch::TensorOptions() - .dtype(d_local_g1_tensor.dtype()) - .device(d_local_g1_tensor.device()); - torch::Tensor recv_g1_tensor = - torch::empty({max_recvnum, tensor_size}, options); - FPTYPE* recv_g1 = recv_g1_tensor.data_ptr(); - FPTYPE* send_g1 = send_g1_tensor.data_ptr() + ntotal * tensor_size; + torch::Tensor send_g1_tensor; + torch::Tensor recv_g1_tensor; + FPTYPE* recv_g1; + FPTYPE* send_g1; + if (nswap != 0) { + send_g1_tensor = d_local_g1_tensor; + int max_recvnum = sendnum_tensor.max().item(); + auto options = torch::TensorOptions() + .dtype(d_local_g1_tensor.dtype()) + .device(d_local_g1_tensor.device()); + recv_g1_tensor = torch::empty({max_recvnum, tensor_size}, options); + recv_g1 = recv_g1_tensor.data_ptr(); + send_g1 = send_g1_tensor.data_ptr() + ntotal * tensor_size; + } int end = ntotal; auto int32_options = torch::TensorOptions().dtype(torch::kInt32);