From 91f095b007c7b9ac37b185f1b376da14ec0e84b0 Mon Sep 17 00:00:00 2001 From: Carlo Camilloni Date: Tue, 19 Mar 2024 21:27:28 +0100 Subject: [PATCH] new cmdata: removed equivalent atoms should be faster --- tools/cmdata/main.cpp | 17 +--- tools/cmdata/src/cmdata/cmdata.hpp | 144 ++++++++-------------------- tools/cmdata/src/cmdata/density.hpp | 6 +- tools/cmdata/src/cmdata/io.hpp | 74 -------------- 4 files changed, 45 insertions(+), 196 deletions(-) diff --git a/tools/cmdata/main.cpp b/tools/cmdata/main.cpp index 2a13dc11..a6d2456f 100644 --- a/tools/cmdata/main.cpp +++ b/tools/cmdata/main.cpp @@ -18,12 +18,12 @@ int main(int argc, const char** argv) double cutoff = 0.75, mol_cutoff = 6.0; int nskip = 0, num_threads = 1, dt = 0; float t_begin = 0.0, t_end = -1.0; - char *p_traj_path = NULL, *p_top_path = NULL, *p_mode = NULL, *p_weights_path = NULL, *p_sym_file_path = NULL; + char *p_traj_path = NULL, *p_top_path = NULL, *p_mode = NULL, *p_weights_path = NULL; char *p_out_prefix = NULL; - std::string traj_path, top_path, mode, weights_path, sym_file_path; + std::string traj_path, top_path, mode, weights_path; std::string out_prefix; int *p_nopbc = NULL; - bool list_sym = false, write_sym = false, nopbc = false; + bool nopbc = false; // make popt options struct poptOption optionsTable[] = { @@ -39,9 +39,6 @@ int main(int argc, const char** argv) {"num_threads", '\0', POPT_ARG_INT | POPT_ARGFLAG_OPTIONAL, &num_threads, 0, "Number of threads", "INT"}, {"mode", '\0', POPT_ARG_STRING | POPT_ARGFLAG_OPTIONAL, &p_mode, 0, "Mode of operation", "STRING"}, {"weights", '\0', POPT_ARG_STRING | POPT_ARGFLAG_OPTIONAL, &p_weights_path, 0, "Weights file", "FILE"}, - {"sym", '\0', POPT_ARG_STRING | POPT_ARGFLAG_OPTIONAL, &p_sym_file_path, 0, "Symmetry file", "FILE"}, - {"list_sym", '\0', POPT_ARG_NONE | POPT_ARGFLAG_OPTIONAL, &list_sym, 0, "List symmetries", 0}, - {"write_sym", '\0', POPT_ARG_NONE | POPT_ARGFLAG_OPTIONAL, &write_sym, 0, "Write symmetries", 0}, {"no_pbc", '\0', POPT_ARG_NONE | POPT_ARGFLAG_OPTIONAL, &p_nopbc, 0, "Ignore pbcs", 0}, POPT_TABLEEND }; @@ -62,7 +59,6 @@ int main(int argc, const char** argv) top_path = std::string(p_top_path); mode = p_mode ? std::string(p_mode) : std::string("intra+same+cross"); if ( p_weights_path != NULL ) weights_path = std::string(p_weights_path); - if ( p_sym_file_path != NULL ) sym_file_path = std::string(p_sym_file_path); if ( p_out_prefix != NULL ) out_prefix = std::string(p_out_prefix); if ( p_nopbc != NULL ) nopbc = true; @@ -82,11 +78,6 @@ int main(int argc, const char** argv) std::cerr << "Weights file does not exist!" << std::endl; return 3; } - if ( !sym_file_path.empty() && !std::filesystem::exists(std::filesystem::path(sym_file_path)) ) - { - std::cerr << "Symmetry file does not exist!" << std::endl; - return 4; - } if ( !out_prefix.empty() ) { bool created = std::filesystem::create_directories(std::filesystem::path(out_prefix)); @@ -146,7 +137,7 @@ int main(int argc, const char** argv) cmdata::CMData cmdata( top_path, traj_path, cutoff, mol_cutoff, nskip, num_threads, dt, - mode, weights_path, sym_file_path, list_sym, nopbc, t_begin, t_end + mode, weights_path, nopbc, t_begin, t_end ); cmdata.run(); cmdata.process_data(); diff --git a/tools/cmdata/src/cmdata/cmdata.hpp b/tools/cmdata/src/cmdata/cmdata.hpp index a0a4740f..53752992 100644 --- a/tools/cmdata/src/cmdata/cmdata.hpp +++ b/tools/cmdata/src/cmdata/cmdata.hpp @@ -58,11 +58,6 @@ class CMData std::vector num_mol_unique_; std::vector inv_num_mol_unique_; std::vector inv_num_mol_; - - // symmetry fields - bool list_sym_; - std::string sym_file_path_; - std::vector>> equivalence_list_; // weights fields double weights_sum_; @@ -119,7 +114,7 @@ class CMData const int i, const int nindex_, t_pbc *pbc, rvec *x, const std::vector &inv_num_mol_, const double cut_sig_2_, const std::vector &natmol2_, const std::vector &num_mol_unique_, const std::vector &mol_id_, const std::vector> &cross_index_, const std::vector &density_bins_, const double mcut2_, - rvec *xcm_, const gmx::RangePartitioning &mols_, gmx_mtop_t *mtop_, const std::vector>> &equivalence_list_, + rvec *xcm_, const gmx::RangePartitioning &mols_, gmx_mtop_t *mtop_, std::vector> &frame_same_mat_, std::vector> &frame_same_mutex_, cmdata_matrix &intram_mat_density_, cmdata_matrix &interm_same_mat_density_, std::vector> &frame_cross_mat_, std::vector> &frame_cross_mutex_, cmdata_matrix &interm_cross_mat_density_, cmdata::parallel::Semaphore &semaphore_, @@ -178,85 +173,48 @@ class CMData a_j++; continue; } - // check for chemical equivalence - double nsym = static_cast(equivalence_list_[mol_id_[i]][a_i].size()*equivalence_list_[mol_id_[j]][a_j].size()); - if (i==j&&a_i!=a_j) + std::size_t delta = a_i - a_j; + rvec sym_dx; + if (pbc != nullptr) pbc_dx(pbc, x[ii], x[jj], sym_dx); + else rvec_sub(x[ii], x[jj], sym_dx); + double dx2 = iprod(sym_dx, sym_dx); + if (i==j) { - // this is to account for the correct normalisation in the case in which - // an intramolecular interaction is between two atoms that are also equivalent - for (std::size_t eq_i = 0; eq_i < equivalence_list_[mol_id_[i]][a_i].size(); eq_i++) - { - for (std::size_t eq_j = 0; eq_j < equivalence_list_[mol_id_[j]][a_j].size(); eq_j++) - { - // get molecule-wise atom index considering equivalence - std::size_t eqa_i = equivalence_list_[mol_id_[i]][a_i][eq_i]; // molecule-wise equivalence index i - std::size_t geqa_i = ii + (eqa_i - equivalence_list_[mol_id_[i]][a_i][0]); // global equivalence index i - std::size_t eqa_j = equivalence_list_[mol_id_[j]][a_j][eq_j]; // molecule-wise equivalence index j - std::size_t geqa_j = jj + (eqa_j - equivalence_list_[mol_id_[j]][a_j][0]); // global equivalence index j - if (geqa_i==geqa_j) nsym=nsym-1.0; - } + if (dx2 < cut_sig_2_) + { // intra molecule species + f_intra_mol_(i, a_i, a_j, dx2, weight, mol_id_, natmol2_, density_bins_, inv_num_mol_, frame_same_mutex_, intram_mat_density_); } } - for (std::size_t eq_i = 0; eq_i < equivalence_list_[mol_id_[i]][a_i].size(); eq_i++) + else { - for (std::size_t eq_j = 0; eq_j < equivalence_list_[mol_id_[j]][a_j].size(); eq_j++) - { - // get molecule-wise atom index considering equivalence - std::size_t eqa_i = equivalence_list_[mol_id_[i]][a_i][eq_i]; // molecule-wise equivalence index i - std::size_t geqa_i = ii + (eqa_i - equivalence_list_[mol_id_[i]][a_i][0]); // global equivalence index i - std::size_t eqa_j = equivalence_list_[mol_id_[j]][a_j][eq_j]; // molecule-wise equivalence index j - std::size_t geqa_j = jj + (eqa_j - equivalence_list_[mol_id_[j]][a_j][0]); // global equivalence index j - std::size_t delta = eqa_i - eqa_j; - if (i==j&&a_i==a_j) { - // this is the special case of intra-self that should not be symmetrized - // the distance of an atom with itself cannot be greater than 0. - geqa_i=ii; - geqa_j=jj; - } - if (i==j&&a_i!=a_j&&geqa_i==geqa_j) continue; - rvec sym_dx; - if (pbc != nullptr) pbc_dx(pbc, x[geqa_i], x[geqa_j], sym_dx); - else rvec_sub(x[geqa_i], x[geqa_j], sym_dx); - double dx2 = iprod(sym_dx, sym_dx); - if (i==j) + if (mol_id_[i]==mol_id_[j]) + { // inter same molecule specie + if (dx2 < cut_sig_2_) { + f_inter_mol_same_( + i, mol_i, a_i, a_j, dx2, weight, mol_id_, natmol2_, density_bins_, frame_same_mutex_, frame_same_mat_, interm_same_mat_density_ + ); + } + if (delta!=0.) { + // this is to account for inversion atom/molecule + if (pbc != nullptr) pbc_dx(pbc, x[ii-delta], x[jj+delta], sym_dx); + else rvec_sub(x[ii-delta], x[jj+delta], sym_dx); + dx2 = iprod(sym_dx, sym_dx); if (dx2 < cut_sig_2_) - { // intra molecule species - f_intra_mol_(i, a_i, a_j, dx2, weight, nsym, mol_id_, natmol2_, density_bins_, inv_num_mol_, frame_same_mutex_, intram_mat_density_); + { + f_inter_mol_same_( + i, mol_i, a_i, a_j, dx2, weight, mol_id_, natmol2_, density_bins_, frame_same_mutex_, frame_same_mat_, interm_same_mat_density_ + ); } } - else + } + else + { // inter cross molecule species + if (dx2 < cut_sig_2_) { - if (mol_id_[i]==mol_id_[j]) - { // inter same molecule specie - if (dx2 < cut_sig_2_) - { - f_inter_mol_same_( - i, mol_i, a_i, a_j, dx2, weight, mol_id_, natmol2_, density_bins_, frame_same_mutex_, frame_same_mat_, interm_same_mat_density_ - ); - } - if (delta!=0.) { - // this is to account for inversion atom/molecule - if (pbc != nullptr) pbc_dx(pbc, x[geqa_i-delta], x[geqa_j+delta], sym_dx); - else rvec_sub(x[geqa_i-delta], x[geqa_j+delta], sym_dx); - dx2 = iprod(sym_dx, sym_dx); - if (dx2 < cut_sig_2_) - { - f_inter_mol_same_( - i, mol_i, a_i, a_j, dx2, weight, mol_id_, natmol2_, density_bins_, frame_same_mutex_, frame_same_mat_, interm_same_mat_density_ - ); - } - } - } - else - { // inter cross molecule species - if (dx2 < cut_sig_2_) - { - f_inter_mol_cross_( - i, j, mol_i, mol_j, a_i, a_j, dx2, weight, mol_id_, natmol2_, cross_index_, density_bins_, frame_cross_mutex_, frame_cross_mat_, interm_cross_mat_density_ - ); - } - } + f_inter_mol_cross_( + i, j, mol_i, mol_j, a_i, a_j, dx2, weight, mol_id_, natmol2_, cross_index_, density_bins_, frame_cross_mutex_, frame_cross_mat_, interm_cross_mat_density_ + ); } } } @@ -274,9 +232,9 @@ class CMData const std::string &top_path, const std::string &traj_path, double cutoff, double mol_cutoff, int nskip, int num_threads, int dt, const std::string &mode, const std::string &weights_path, - const std::string &sym_file_path, bool list_sym, bool no_pbc, float t_begin, float t_end + bool no_pbc, float t_begin, float t_end ) : cutoff_(cutoff), mol_cutoff_(mol_cutoff), nskip_(nskip), num_threads_(num_threads), - mode_(mode), weights_path_(weights_path), sym_file_path_(sym_file_path), list_sym_(list_sym), + mode_(mode), weights_path_(weights_path), no_pbc_(no_pbc), dt_(dt), t_begin_(t_begin), t_end_(t_end) { bool bTop_; @@ -462,32 +420,6 @@ class CMData } } - if (sym_file_path_=="") std::cout << "No symmetry file provided. Running with standard settings.\n"; - else std::cout << "Running with symmetry file " << sym_file_path_ << "\nReading file...\n"; - cmdata::io::read_symmetry_indices(sym_file_path_, mtop_, equivalence_list_, natmol2_, start_index); - - if (list_sym_) - { - std::cout << "Writing out symmetry listing into sym_list.txt" << std::endl; - std::fstream sym_list_file("sym_list.txt", std::fstream::out); - for (int i = 0; i < equivalence_list_.size(); i++) - { - sym_list_file << "[ molecule_" << i << " ]\n"; - for ( int j = 0; j < equivalence_list_[i].size(); j++ ) - { - sym_list_file << "atom " << j << ":"; - for ( int k = 0; k < equivalence_list_[i][j].size(); k++ ) - { - sym_list_file << " " << equivalence_list_[i][j][k]; - } - sym_list_file << "\n"; - } - sym_list_file << "\n"; - } - sym_list_file << "\n"; - sym_list_file.close(); - } - n_bins_ = cmdata::indexing::n_bins(cutoff_); dx_ = cutoff_ / static_cast(n_bins_); @@ -585,7 +517,7 @@ class CMData /* start molecule thread*/ mol_threads_[i] = std::thread(molecule_routine, i, nindex_, pbc_, frame_->x, std::cref(inv_num_mol_), cut_sig_2_, std::cref(natmol2_), std::cref(num_mol_unique_), std::cref(mol_id_), std::cref(cross_index_), - std::cref(density_bins_), mcut2_, xcm_, mols_, mtop_, std::cref(equivalence_list_), std::ref(frame_same_mat_), + std::cref(density_bins_), mcut2_, xcm_, mols_, mtop_, std::ref(frame_same_mat_), std::ref(frame_same_mutex_), std::ref(intram_mat_density_), std::ref(interm_same_mat_density_), std::ref(frame_cross_mat_), std::ref(frame_cross_mutex_), std::ref(interm_cross_mat_density_), std::ref(semaphore_), std::cref(f_intra_mol_), std::cref(f_inter_mol_same_), std::cref(f_inter_mol_cross_), weight); @@ -832,4 +764,4 @@ class CMData } // namespace cmdata -#endif // _CM_DATA_HPP \ No newline at end of file +#endif // _CM_DATA_HPP diff --git a/tools/cmdata/src/cmdata/density.hpp b/tools/cmdata/src/cmdata/density.hpp index 9dc8976f..a9c94d9e 100644 --- a/tools/cmdata/src/cmdata/density.hpp +++ b/tools/cmdata/src/cmdata/density.hpp @@ -33,7 +33,7 @@ void kernel_density_estimator(std::vector::iterator x, const std::vector } void intra_mol_routine( - int i, std::size_t a_i, std::size_t a_j, double dx2, double weight, int nsym, const std::vector &mol_id_, + int i, std::size_t a_i, std::size_t a_j, double dx2, double weight, const std::vector &mol_id_, const std::vector &natmol2_, const std::vector &density_bins_, const std::vector &inv_num_mol_, std::vector> &frame_same_mutex_, std::vector>>> &intram_mat_density_ @@ -41,7 +41,7 @@ void intra_mol_routine( { std::size_t same_mutex_index = cmdata::indexing::mutex_access(mol_id_[i], a_i, a_j, natmol2_); std::unique_lock lock(frame_same_mutex_[mol_id_[i]][same_mutex_index]); - kernel_density_estimator(std::begin(intram_mat_density_[mol_id_[i]][a_i][a_j]), density_bins_, std::sqrt(dx2), weight*inv_num_mol_[i]/nsym); + kernel_density_estimator(std::begin(intram_mat_density_[mol_id_[i]][a_i][a_j]), density_bins_, std::sqrt(dx2), weight*inv_num_mol_[i]); lock.unlock(); } @@ -90,4 +90,4 @@ void normalize_histo( } // namespace cmdata::density -#endif // _CMDATA_DENSIY_HPP \ No newline at end of file +#endif // _CMDATA_DENSIY_HPP diff --git a/tools/cmdata/src/cmdata/io.hpp b/tools/cmdata/src/cmdata/io.hpp index 90a37849..f9603dba 100644 --- a/tools/cmdata/src/cmdata/io.hpp +++ b/tools/cmdata/src/cmdata/io.hpp @@ -112,80 +112,6 @@ void mtopGetAtomAndResidueName(const gmx_mtop_t& mtop, namespace cmdata::io { -void read_symmetry_indices( - const std::string &path, - const gmx_mtop_t *top, - std::vector>> &eq_list, - const std::vector &natmol2_, - const std::vector &start_index - ) -{ - - eq_list.resize(natmol2_.size()); - for (std::size_t i = 0; i < natmol2_.size(); i++) { - eq_list[i].resize(natmol2_[i]); - for (int ii = 0; ii < natmol2_[i]; ii++) - { - eq_list[i][ii].push_back(ii); - } - } - - std::ifstream infile(path); - if(path!=""&&!infile.good()) - { - std::string errorMessage = "Cannot find the indicated symmetry file"; - throw std::runtime_error(errorMessage.c_str()); - } - - int molb = 0; - std::string residue_entry, atom_entry_i, atom_entry_j, left; - std::string line; - - while (std::getline(infile, line)) - { - std::istringstream iss(line); - if (!(iss >> residue_entry >> atom_entry_i >> atom_entry_j)) // each necessary field is there - { - if (line=="") continue; - printf("Skipping line\n%s\n due to syntax non-conformity\n", line.c_str()); - continue; - } - if((iss >> left)) printf("Found a problem while reading the symmestry file: %s\n This element is ignored.\n Multiple equivament atoms should be set listing the relevant combinations\n", left.c_str()); - - const char *atom_name_i, *atom_name_j, *residue_name_i, *residue_name_j; - int a_i, a_j, resn_i, resn_j; - for (std::size_t i = 0; i < natmol2_.size(); i++) - { - a_i = 0; - for (int ii = start_index[i]; ii < start_index[i]+natmol2_[i]; ii++) - { - mtopGetAtomAndResidueName(*top, ii, &molb, &atom_name_i, &resn_i, &residue_name_i, nullptr); - a_j = 0; - for (int jj = start_index[i]; jj < start_index[i]+natmol2_[i]; jj++) - { - mtopGetAtomAndResidueName(*top, jj, &molb, &atom_name_j, &resn_j, &residue_name_j, nullptr); - if (((atom_name_i==atom_entry_i&&atom_name_j==atom_entry_j)||(atom_name_i==atom_entry_j&&atom_name_j==atom_entry_i))&&residue_entry==residue_name_i&&resn_i==resn_j) - { - bool insert = true; - // check if element is already inserted - for ( auto e : eq_list[i][a_i] ) - { - if (e==a_j) insert = false; - } - // insert if not yet present - if (insert) { - eq_list[i][a_i].push_back(a_j); - } - } - a_j++; - } - a_i++; - } - } - } -} - - std::vector read_weights_file( const std::string &path ) { std::ifstream infile(path);