Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmdata and make_mat HDF5 format #535

Merged
merged 2 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions tools/cmdata/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ endif()

# find gromacs
find_package(GROMACS REQUIRED NAMES gromacs gromacs_mpi gromacs_d gromacs_mpi_d HINTS "$ENV{GROMACS_DIR}")
find_package(HDF5 REQUIRED COMPONENTS C CXX)

# include source and header files
target_include_directories(${CMDATA} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
target_include_directories(${CMDATA} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src ${HDF5_INCLUDE_DIRS})

include(FetchContent)
SET(FETCHCONTENT_QUIET OFF)
Expand All @@ -50,7 +51,7 @@ FetchContent_Declare(
FetchContent_MakeAvailable(xdrfile)

# link libraries
target_link_libraries(${CMDATA} PRIVATE Gromacs::libgromacs xdrfile popt)
target_link_libraries(${CMDATA} PRIVATE Gromacs::libgromacs xdrfile popt ${HDF5_CXX_LIBRARIES})

set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib)
install(TARGETS ${CMDATA} DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
Expand Down
6 changes: 5 additions & 1 deletion tools/cmdata/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ int main(int argc, const char** argv)
std::string out_prefix;
int *p_nopbc = NULL;
int *p_res = NULL;
int *p_h5 = NULL;
bool nopbc = false;
bool res = false;
bool h5 = false;

// make popt options
struct poptOption optionsTable[] = {
Expand All @@ -44,6 +46,7 @@ int main(int argc, const char** argv)
{"mode", '\0', POPT_ARG_STRING | POPT_ARGFLAG_OPTIONAL, &p_mode, 0, "Mode of operation", "STRING"},
{"weights", '\0', POPT_ARG_STRING | POPT_ARGFLAG_OPTIONAL, &p_weights_path, 0, "Weights file", "FILE"},
{"no_pbc", '\0', POPT_ARG_NONE | POPT_ARGFLAG_OPTIONAL, &p_nopbc, 0, "Ignore pbcs", 0},
{"h5", '\0', POPT_ARG_NONE | POPT_ARGFLAG_OPTIONAL, &p_h5 , 0, "Write output in HDF5 format", 0},
POPT_TABLEEND
};

Expand All @@ -70,6 +73,7 @@ int main(int argc, const char** argv)
if ( p_weights_path != NULL ) weights_path = std::string(p_weights_path);
if ( p_out_prefix != NULL ) out_prefix = std::string(p_out_prefix);
if ( p_nopbc != NULL ) nopbc = true;
if ( p_h5 != NULL ) h5 = true;

// check if paths are valid
if ( !std::filesystem::exists(std::filesystem::path(traj_path)) )
Expand Down Expand Up @@ -151,7 +155,7 @@ int main(int argc, const char** argv)

cmdata::CMData cmdata(
top_path, traj_path, cutoff, mol_cutoff, nskip, num_threads, mol_threads, dt,
mode, weights_path, nopbc, t_begin, t_end
mode, weights_path, nopbc, t_begin, t_end, h5
);
cmdata.run();
cmdata.process_data();
Expand Down
117 changes: 68 additions & 49 deletions tools/cmdata/src/cmdata/cmdata.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class CMData
int nskip_;
gmx_mtop_t *mtop_;
rvec *xcm_ = nullptr;
double cutoff_, mol_cutoff_, mcut2_, cut_sig_2_;
float cutoff_, mol_cutoff_, mcut2_, cut_sig_2_;

// molecule number fields
int nindex_;
Expand All @@ -56,13 +56,13 @@ class CMData
std::vector<int> natmol2_;
std::vector<int> mol_id_;
std::vector<int> num_mol_unique_;
std::vector<double> inv_num_mol_unique_;
std::vector<double> inv_num_mol_;
std::vector<float> inv_num_mol_unique_;
std::vector<float> inv_num_mol_;

// weights fields
double weights_sum_;
float weights_sum_;
std::string weights_path_;
std::vector<double> weights_;
std::vector<float> weights_;

// pbc fields
bool no_pbc_;
Expand All @@ -74,21 +74,21 @@ class CMData
XDRFILE *trj_;

// density fields
double dx_;
float dx_;
std::size_t n_bins_;
std::vector<double> density_bins_;
std::vector<float> density_bins_;
std::vector<std::vector<int>> cross_index_;

using cmdata_matrix = std::vector<std::vector<std::vector<std::vector<double>>>>;
using cmdata_matrix = std::vector<std::vector<std::vector<std::vector<float>>>>;
cmdata_matrix interm_same_mat_density_;
cmdata_matrix interm_cross_mat_density_;
cmdata_matrix intram_mat_density_;
cmdata_matrix interm_same_maxcdf_mol_;
cmdata_matrix interm_cross_maxcdf_mol_;

// temporary containers for maxcdf operations
std::vector<std::vector<double>> frame_same_mat_;
std::vector<std::vector<double>> frame_cross_mat_;
std::vector<std::vector<float>> frame_same_mat_;
std::vector<std::vector<float>> frame_cross_mat_;
std::vector<std::vector<std::mutex>> frame_same_mutex_;
std::vector<std::vector<std::mutex>> frame_cross_mutex_;

Expand All @@ -104,6 +104,7 @@ class CMData
// mode selection, booleans and functions
std::string mode_;
bool intra_ = false, same_ = false, cross_ = false;
bool h5_ = false;

// function types
using ftype_intra_ = cmdata::ftypes::function_traits<decltype(&cmdata::density::intra_mol_routine)>;
Expand All @@ -115,15 +116,15 @@ class CMData
std::function<ftype_cross_::signature> f_inter_mol_cross_;

static void molecule_routine(
const int i, const int nindex_, t_pbc *pbc, rvec *x, const std::vector<double> &inv_num_mol_, const double cut_sig_2_,
const int i, const int nindex_, t_pbc *pbc, rvec *x, const std::vector<float> &inv_num_mol_, const float cut_sig_2_,
const std::vector<int> &natmol2_, const std::vector<int> &num_mol_unique_, const std::vector<int> &mol_id_,
const std::vector<std::vector<int>> &cross_index_, const std::vector<double> &density_bins_, const double mcut2_,
const std::vector<std::vector<int>> &cross_index_, const std::vector<float> &density_bins_, const float mcut2_,
rvec *xcm_, const gmx::RangePartitioning &mols_, gmx_mtop_t *mtop_,
std::vector<std::vector<double>> &frame_same_mat_, std::vector<std::vector<std::mutex>> &frame_same_mutex_,
cmdata_matrix &intram_mat_density_, cmdata_matrix &interm_same_mat_density_, std::vector<std::vector<double>> &frame_cross_mat_,
std::vector<std::vector<float>> &frame_same_mat_, std::vector<std::vector<std::mutex>> &frame_same_mutex_,
cmdata_matrix &intram_mat_density_, cmdata_matrix &interm_same_mat_density_, std::vector<std::vector<float>> &frame_cross_mat_,
std::vector<std::vector<std::mutex>> &frame_cross_mutex_, cmdata_matrix &interm_cross_mat_density_, cmdata::parallel::Semaphore &semaphore_,
const std::function<ftype_intra_::signature> &f_intra_mol_, const std::function<ftype_same_::signature> &f_inter_mol_same_,
const std::function<ftype_cross_::signature> &f_inter_mol_cross_, double weight
const std::function<ftype_cross_::signature> &f_inter_mol_cross_, float weight
)
{
semaphore_.acquire();
Expand All @@ -150,7 +151,7 @@ class CMData
rvec dx;
if (pbc != nullptr) pbc_dx(pbc, xcm_[i], xcm_[j], dx);
else rvec_sub(xcm_[i], xcm_[j], dx);
double dx2 = iprod(dx, dx);
float dx2 = iprod(dx, dx);
if (dx2 > mcut2_) continue;
}
/* for molecules of different specie we fill half a matrix */
Expand Down Expand Up @@ -181,7 +182,7 @@ class CMData
rvec sym_dx;
if (pbc != nullptr) pbc_dx(pbc, x[ii], x[jj], sym_dx);
else rvec_sub(x[ii], x[jj], sym_dx);
double dx2 = iprod(sym_dx, sym_dx);
float dx2 = iprod(sym_dx, sym_dx);
if (i==j)
{
if (dx2 < cut_sig_2_)
Expand Down Expand Up @@ -234,12 +235,12 @@ class CMData
public:
CMData(
const std::string &top_path, const std::string &traj_path,
double cutoff, double mol_cutoff, int nskip, int num_threads, int num_mol_threads,
float cutoff, float mol_cutoff, int nskip, int num_threads, int num_mol_threads,
int dt, const std::string &mode, const std::string &weights_path,
bool no_pbc, float t_begin, float t_end
bool no_pbc, float t_begin, float t_end, bool h5
) : cutoff_(cutoff), mol_cutoff_(mol_cutoff), nskip_(nskip), num_threads_(num_threads), num_mol_threads_(num_mol_threads),
mode_(mode), weights_path_(weights_path),
no_pbc_(no_pbc), dt_(dt), t_begin_(t_begin), t_end_(t_end)
no_pbc_(no_pbc), dt_(dt), t_begin_(t_begin), t_end_(t_end), h5_(h5)
{
bool bTop_;
matrix boxtop_;
Expand Down Expand Up @@ -357,11 +358,11 @@ class CMData
for ( auto i : num_mol_unique_ )
{
natmol2_.push_back(mols_.block(molb_index).end() - mols_.block(molb_index).begin());
inv_num_mol_unique_.push_back(1. / static_cast<double>(i));
inv_num_mol_unique_.push_back(1. / static_cast<float>(i));
for ( int j = 0; j < i; j++ )
{
mol_id_.push_back(mol_id);
inv_num_mol_.push_back(1. / static_cast<double>(i));
inv_num_mol_.push_back(1. / static_cast<float>(i));
}
mol_id++;
molb_index += i;
Expand Down Expand Up @@ -403,29 +404,29 @@ class CMData

density_bins_.resize(cmdata::indexing::n_bins(cutoff_));
for (std::size_t i = 0; i < density_bins_.size(); i++)
density_bins_[i] = cutoff_ / static_cast<double>(density_bins_.size()) * static_cast<double>(i) + cutoff_ / static_cast<double>(density_bins_.size() * 2);
density_bins_[i] = cutoff_ / static_cast<float>(density_bins_.size()) * static_cast<float>(i) + cutoff_ / static_cast<float>(density_bins_.size() * 2);

int cross_count = 0;
if (cross_) cross_index_.resize(natmol2_.size(), std::vector<int>(natmol2_.size(), 0));
for ( std::size_t i = 0; i < natmol2_.size(); i++ )
{
if (same_)
{
interm_same_mat_density_[i].resize(natmol2_[i], std::vector<std::vector<double>>(natmol2_[i], std::vector<double>(cmdata::indexing::n_bins(cutoff_), 0)));
interm_same_maxcdf_mol_[i].resize(natmol2_[i], std::vector<std::vector<double>>(natmol2_[i], std::vector<double>(cmdata::indexing::n_bins(cutoff_), 0)));
interm_same_mat_density_[i].resize(natmol2_[i], std::vector<std::vector<float>>(natmol2_[i], std::vector<float>(cmdata::indexing::n_bins(cutoff_), 0)));
interm_same_maxcdf_mol_[i].resize(natmol2_[i], std::vector<std::vector<float>>(natmol2_[i], std::vector<float>(cmdata::indexing::n_bins(cutoff_), 0)));
}
if (intra_) intram_mat_density_[i].resize(natmol2_[i], std::vector<std::vector<double>>(natmol2_[i], std::vector<double>(cmdata::indexing::n_bins(cutoff_), 0)));
if (intra_) intram_mat_density_[i].resize(natmol2_[i], std::vector<std::vector<float>>(natmol2_[i], std::vector<float>(cmdata::indexing::n_bins(cutoff_), 0)));
for ( std::size_t j = i + 1; j < natmol2_.size() && cross_; j++ )
{
interm_cross_mat_density_[cross_count].resize(natmol2_[i], std::vector<std::vector<double>>(natmol2_[j], std::vector<double>(cmdata::indexing::n_bins(cutoff_), 0)));
interm_cross_maxcdf_mol_[cross_count].resize(natmol2_[i], std::vector<std::vector<double>>(natmol2_[j], std::vector<double>(cmdata::indexing::n_bins(cutoff_), 0)));
interm_cross_mat_density_[cross_count].resize(natmol2_[i], std::vector<std::vector<float>>(natmol2_[j], std::vector<float>(cmdata::indexing::n_bins(cutoff_), 0)));
interm_cross_maxcdf_mol_[cross_count].resize(natmol2_[i], std::vector<std::vector<float>>(natmol2_[j], std::vector<float>(cmdata::indexing::n_bins(cutoff_), 0)));
cross_index_[i][j] = cross_count;
cross_count++;
}
}

n_bins_ = cmdata::indexing::n_bins(cutoff_);
dx_ = cutoff_ / static_cast<double>(n_bins_);
dx_ = cutoff_ / static_cast<float>(n_bins_);

mcut2_ = mol_cutoff_ * mol_cutoff_;
cut_sig_2_ = (cutoff_ + 0.02) * (cutoff_ + 0.02);
Expand All @@ -452,7 +453,7 @@ class CMData
printf("Weights file provided. Reading weights from %s\n", weights_path_.c_str());
weights_ = cmdata::io::read_weights_file(weights_path_);
printf("Found %li frame weights in file\n", weights_.size());
double w_sum = std::accumulate(std::begin(weights_), std::end(weights_), 0.0, std::plus<>());
float w_sum = std::accumulate(std::begin(weights_), std::end(weights_), 0.0, std::plus<>());
printf("Sum of weights amounts to %lf\n", w_sum);
weights_sum_ = 0.;
}
Expand Down Expand Up @@ -602,7 +603,7 @@ class CMData
start_j_cross = end_j_cross;
}

printf("Finished preprocessing. Starting frame-by-frame analysis.\n");
printf("Finished preprocessing.\nStarting frame-by-frame analysis.\n");
}

void run()
Expand All @@ -622,7 +623,7 @@ class CMData
if ((frame_->time >= t_begin_ && (t_end_ < 0 || frame_->time <= t_end_ )) && // within time borders
( dt_ == 0 || std::fmod(frame_->time, dt_) == 0) && (nskip_ == 0 || std::fmod(frnr, nskip_) == 0)) // skip frames
{
double weight = 1.0;
float weight = 1.0;
if (!weights_.empty())
{
weight = weights_[frnr];
Expand All @@ -643,7 +644,7 @@ class CMData
for (int i = 0; i < nindex_; i++)
{
clear_rvec(xcm_[i]);
double tm = 0.;
float tm = 0.;
for (int ii = mols_.block(i).begin(); ii < mols_.block(i).end(); ii++)
{
for (int m = 0; (m < DIM); m++)
Expand Down Expand Up @@ -693,11 +694,11 @@ class CMData

void process_data()
{
std::cout << "Finished frame-by-frame analysis\n";
std::cout << "\nFinished frame-by-frame analysis\n";
std::cout << "Analyzed " << n_x_ << " frames\n";
std::cout << "Normalizing data... " << std::endl;
// normalisations
double norm = ( weights_.empty() ) ? 1. / n_x_ : 1. / weights_sum_;
float norm = ( weights_.empty() ) ? 1. / n_x_ : 1. / weights_sum_;

using ftype_norm = cmdata::ftypes::function_traits<decltype(&cmdata::density::normalize_histo)>;
std::function<ftype_norm::signature> f_empty = cmdata::ftypes::do_nothing<ftype_norm>();
Expand All @@ -712,12 +713,12 @@ class CMData
{
for (int jj = ii; jj < natmol2_[i]; jj++)
{
double inv_num_mol_same = inv_num_mol_unique_[i];
float inv_num_mol_same = inv_num_mol_unique_[i];
normalize_inter_same(i, ii, jj, norm, inv_num_mol_same, interm_same_maxcdf_mol_);
normalize_inter_same(i, ii, jj, norm, 1.0, interm_same_mat_density_);
normalize_intra(i, ii, jj, norm, 1.0, intram_mat_density_);

double sum = 0.0;
float sum = 0.0;
for ( std::size_t k = (same_) ? 0 : cmdata::indexing::n_bins(cutoff_); k < cmdata::indexing::n_bins(cutoff_); k++ )
{
sum+= dx_ * interm_same_maxcdf_mol_[i][ii][jj][k];
Expand All @@ -735,11 +736,11 @@ class CMData
{
for (int jj = 0; jj < natmol2_[j]; jj++)
{
double inv_num_mol_cross = inv_num_mol_unique_[i];
float inv_num_mol_cross = inv_num_mol_unique_[i];
normalize_inter_cross(cross_index_[i][j], ii, jj, norm, 1.0, interm_cross_mat_density_);
normalize_inter_cross(cross_index_[i][j], ii, jj, norm, inv_num_mol_cross, interm_cross_maxcdf_mol_);

double sum = 0.0;
float sum = 0.0;
for ( std::size_t k = (cross_) ? 0 : cmdata::indexing::n_bins(cutoff_); k < cmdata::indexing::n_bins(cutoff_); k++ )
{
sum += dx_ * interm_cross_maxcdf_mol_[cross_index_[i][j]][ii][jj][k];
Expand All @@ -755,21 +756,35 @@ class CMData
void write_output( const std::string &output_prefix )
{
std::cout << "Writing data... " << std::endl;
using ftype_write_intra = cmdata::ftypes::function_traits<decltype(&cmdata::io::f_write_intra)>;
using ftype_write_inter_same = cmdata::ftypes::function_traits<decltype(&cmdata::io::f_write_inter_same)>;
using ftype_write_inter_cross = cmdata::ftypes::function_traits<decltype(&cmdata::io::f_write_inter_cross)>;
using ftype_write_intra = cmdata::ftypes::function_traits<decltype(&cmdata::io::f_write_intra_HDF5)>;
using ftype_write_inter_same = cmdata::ftypes::function_traits<decltype(&cmdata::io::f_write_inter_same_HDF5)>;
using ftype_write_inter_cross = cmdata::ftypes::function_traits<decltype(&cmdata::io::f_write_inter_cross_HDF5)>;
std::function<ftype_write_intra::signature> write_intra = cmdata::ftypes::do_nothing<ftype_write_intra>();
std::function<ftype_write_inter_same::signature> write_inter_same = cmdata::ftypes::do_nothing<ftype_write_inter_same>();
std::function<ftype_write_inter_cross::signature> write_inter_cross = cmdata::ftypes::do_nothing<ftype_write_inter_cross>();
if (intra_) write_intra = cmdata::io::f_write_intra;
if (same_) write_inter_same = cmdata::io::f_write_inter_same;
if (cross_) write_inter_cross = cmdata::io::f_write_inter_cross;

if (intra_)
{
if(h5_) write_intra = cmdata::io::f_write_intra_HDF5;
else write_intra = cmdata::io::f_write_intra;
}
if (same_)
{
if(h5_) write_inter_same = cmdata::io::f_write_inter_same_HDF5;
else write_inter_same = cmdata::io::f_write_inter_same;
}
if (cross_)
{
if(h5_) write_inter_cross = cmdata::io::f_write_inter_cross_HDF5;
else write_inter_cross = cmdata::io::f_write_inter_cross;
}

for (std::size_t i = 0; i < natmol2_.size(); i++)
{
std::cout << "Writing data for molecule " << i << "..." << std::endl;
cmdata::io::print_progress_bar(0.0);
float progress = 0.0, new_progress = 0.0;
float progress = 0.0, new_progress = 0.0;

for (int ii = 0; ii < natmol2_[i]; ii++)
{
new_progress = static_cast<float>(ii) / static_cast<float>(natmol2_[i]);
Expand All @@ -781,14 +796,18 @@ class CMData
write_intra(output_prefix, i, ii, density_bins_, natmol2_, intram_mat_density_);
write_inter_same(output_prefix, i, ii, density_bins_, natmol2_, interm_same_mat_density_, interm_same_maxcdf_mol_);
}
for (std::size_t j = i + 1; j < natmol2_.size(); j++)
if(cross_)
{
for (int ii = 0; ii < natmol2_[i]; ii++)
for (std::size_t j = i + 1; j < natmol2_.size(); j++)
{
write_inter_cross(output_prefix, i, j, ii, density_bins_, natmol2_, cross_index_, interm_cross_mat_density_, interm_cross_maxcdf_mol_);
for (int ii = 0; ii < natmol2_[i]; ii++)
{
write_inter_cross(output_prefix, i, j, ii, density_bins_, natmol2_, cross_index_, interm_cross_mat_density_, interm_cross_maxcdf_mol_);
}
}
}
}

cmdata::io::print_progress_bar(1.0);
std::cout << "\nFinished!" << std::endl;
}
Expand Down
Loading
Loading