diff --git a/tools/cmdata/src/cmdata/cmdata.hpp b/tools/cmdata/src/cmdata/cmdata.hpp index 7e7527e0..18ce8ba3 100644 --- a/tools/cmdata/src/cmdata/cmdata.hpp +++ b/tools/cmdata/src/cmdata/cmdata.hpp @@ -375,7 +375,11 @@ class CMData } if(!check_same && same_) same_ = false; if(natmol2_.size()<2) cross_ = false; - + if(nindex_>1 && (same_ || cross_)) + { + printf("\n\n::::::::::::WARNING::::::::::::\nMore than 1 molcule found in the system.\nFix pbc before running cmdata using pbc mol\n"); + printf(":::::::::::::::::::::::::::::::\n\n"); + } if (same_) { f_inter_mol_same_ = cmdata::density::inter_mol_same_routine; diff --git a/tools/resdata/.gitignore b/tools/resdata/.gitignore new file mode 100644 index 00000000..55fae157 --- /dev/null +++ b/tools/resdata/.gitignore @@ -0,0 +1,2 @@ +build/ +exe/ diff --git a/tools/resdata/CMakeLists.txt b/tools/resdata/CMakeLists.txt new file mode 100644 index 00000000..ff6eb8fd --- /dev/null +++ b/tools/resdata/CMakeLists.txt @@ -0,0 +1,62 @@ +cmake_minimum_required(VERSION 3.16) + +project(resdata VERSION 0.1 + DESCRIPTION "A programm to calculate contact data from gromacs trajectories for multi-eGO" + LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(RESDATA resdata) + +set(CMAKE_RPATH "${CMAKE_INSTALL_PREFIX}/lib") +set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + +add_executable(${RESDATA} main.cpp) + +# set build type to release if not set (check if its debug, Debug, or DEBUG) +if(CMAKE_BUILD_TYPE MATCHES "Debug") + set(CMAKE_BUILD_TYPE Debug) + set(CMAKE_CXX_FLAGS_DEBUG "-g") +elseif(CMAKE_BUILD_TYPE MATCHES "Release" OR NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) + set(CMAKE_CXX_FLAGS "-Wall -Wextra -march=native -Wno-unused-parameter") + set(CMAKE_CXX_FLAGS_RELEASE "-O3") +endif() + +# find gromacs +find_package(GROMACS REQUIRED NAMES gromacs gromacs_mpi gromacs_d gromacs_mpi_d HINTS "$ENV{GROMACS_DIR}") + +# include source and header files +target_include_directories(${RESDATA} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) + +include(FetchContent) +SET(FETCHCONTENT_QUIET OFF) +SET(FETCHCONTENT_BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}/resdata_fetch) + +message(STATUS "Fetching popt") +FetchContent_Declare( + popt + URL https://github.com/rpm-software-management/popt/archive/refs/heads/master.zip + GIT_TAG 2bca0aa + PATCH_COMMAND patch --directory=${FETCHCONTENT_BASE_DIR}/popt-src -p0 < ${CMAKE_CURRENT_SOURCE_DIR}/popt.patch +) +FetchContent_MakeAvailable(popt) + +message(STATUS "Fetching xdrfile") +FetchContent_Declare( + xdrfile + URL https://github.com/multi-ego/xdrfile/archive/refs/heads/chemfiles.zip +) +FetchContent_MakeAvailable(xdrfile) + +# link libraries +target_link_libraries(${RESDATA} PRIVATE Gromacs::libgromacs xdrfile popt) + +set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib) +install(TARGETS ${RESDATA} DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) + +# build test +if(RESDATA_BUILD_TESTS) + enable_testing() + add_subdirectory(test/) +endif() diff --git a/tools/resdata/README.md b/tools/resdata/README.md new file mode 100644 index 00000000..df690a74 --- /dev/null +++ b/tools/resdata/README.md @@ -0,0 +1,36 @@ +# resdata + +`resdata` is a tool that calculates residue wise interatomic distances and contact probabilities probabilities accounting for symmetric molecules . It works using the GROMACS API. + +## Installation + +To install it you need to compile and install GROMACS (2023/2024) using `-DGMX_INSTALL_LEGACY_API=ON` `-DBUILD_SHARED_LIBS=ON` +`-DGMX_INSTALL_NBLIB_API=ON`. Then in a local subfolder (e.g., `build`, run `cmake ..` then `make` and `make install`). + +## Usage +resdata calculates a residue wise contact and distance matrix for complex system. +It can Calculate block averages of probabilities +It takes in input an index (only hole molecules can be removed otherwise an error is raised) + +``` +Usage: cmdata [OPTION...] + -f, --traj=FILE Trajectory file + -s, --top=FILE Topology file + -b, --t_begin[=FLOAT] Start time + -e, --t_end[=FLOAT] End time + -o, --out[=STRING] Output prefix + -n, --index[=STRING] Index file + --dt[=INT] Time step + --cutoff[=DOUBLE] Cutoff distance + --mol_cutoff[=DOUBLE] Molecule cutoff distance + --nskip[=INT] Number of frames to skip + --num_threads[=INT] Number of threads + --mode[=STRING] Mode of operation + --weights[=FILE] Weights file + --no_pbc Ignore pbcs + --blk_avg Calculates block average for probabili + +Help options: + -?, --help Show this help message + --usage Display brief usage message +``` diff --git a/tools/resdata/main.cpp b/tools/resdata/main.cpp new file mode 100644 index 00000000..3e07ac6a --- /dev/null +++ b/tools/resdata/main.cpp @@ -0,0 +1,169 @@ +// standard library imports +#include +#include +#include +// cmdata import +#include "src/resdata/resdata.hpp" +// external library import +#include + +int main(int argc, const char** argv) +{ + std::cout << "################################################" << std::endl; + std::cout << "#################### RESDATA ####################" << std::endl; + std::cout << "################################################" << std::endl; + std::cout << "################## Version 0 ################" << std::endl; + std::cout << "################################################\n" << std::endl; + + double cutoff = 0.55, mol_cutoff = 6.0; + int nskip = 0, num_threads = 1, mol_threads = -1, dt = 0; + float t_begin = 0.0, t_end = -1.0; + char *p_traj_path = NULL, *p_top_path = NULL, *p_mode = NULL, *p_weights_path = NULL, *p_index_path = NULL; + char *p_out_prefix = NULL; + std::string traj_path, top_path, mode, weights_path, index_path; + std::string out_prefix; + int *p_nopbc = NULL; + int *p_blk_avg = NULL; + bool nopbc = false; + bool blk_avg = false; + + // make popt options + struct poptOption optionsTable[] = { + POPT_AUTOHELP + {"traj", 'f', POPT_ARG_STRING, &p_traj_path, 0, "Trajectory file", "FILE"}, + {"top", 's', POPT_ARG_STRING, &p_top_path, 0, "Topology file", "FILE"}, + {"t_begin", 'b', POPT_ARG_FLOAT | POPT_ARGFLAG_OPTIONAL, &t_begin, 0, "Start time", "FLOAT"}, + {"t_end", 'e', POPT_ARG_FLOAT | POPT_ARGFLAG_OPTIONAL, &t_end, 0, "End time", "FLOAT"}, + {"out", 'o', POPT_ARG_STRING | POPT_ARGFLAG_OPTIONAL, &p_out_prefix, 0, "Output prefix", "STRING"}, + {"index", 'n', POPT_ARG_STRING | POPT_ARGFLAG_OPTIONAL, &p_index_path, 0, "Index file ", "FILE"}, + {"dt", '\0', POPT_ARG_INT | POPT_ARGFLAG_OPTIONAL, &dt, 0, "Time step", "INT"}, + {"cutoff", '\0', POPT_ARG_DOUBLE | POPT_ARGFLAG_OPTIONAL, &cutoff, 0, "Cutoff distance", "DOUBLE"}, + {"mol_cutoff", '\0', POPT_ARG_DOUBLE | POPT_ARGFLAG_OPTIONAL, &mol_cutoff, 0, "Molecule cutoff distance", "DOUBLE"}, + {"nskip", '\0', POPT_ARG_INT | POPT_ARGFLAG_OPTIONAL, &nskip, 0, "Number of frames to skip", "INT"}, + {"num_threads", '\0', POPT_ARG_INT | POPT_ARGFLAG_OPTIONAL, &num_threads, 0, "Number of threads", "INT"}, + {"mol_threads", '\0', POPT_ARG_INT | POPT_ARGFLAG_OPTIONAL, &mol_threads, 0, "Number of molecule threads", "INT"}, + {"mode", '\0', POPT_ARG_STRING | POPT_ARGFLAG_OPTIONAL, &p_mode, 0, "Mode of operation", "STRING"}, + {"weights", '\0', POPT_ARG_STRING | POPT_ARGFLAG_OPTIONAL, &p_weights_path, 0, "Weights file", "FILE"}, + {"no_pbc", '\0', POPT_ARG_NONE | POPT_ARGFLAG_OPTIONAL, &p_nopbc, 0, "Ignore pbcs", 0}, + {"blk_avg", '\0', POPT_ARG_NONE | POPT_ARGFLAG_OPTIONAL, &p_blk_avg, 0, "Block Average", 0}, + POPT_TABLEEND + }; + + // parse options + poptContext opt_context = poptGetContext("resdata", argc, argv, optionsTable, 0); + int opt=poptGetNextOpt(opt_context); // needs to be run to parse + if (opt < -1) { + /* Handle error condition */ + fprintf(stderr, "%s: %s\n", poptBadOption(opt_context, POPT_BADOPTION_NOALIAS), poptStrerror(opt)); + return 1; + } + poptFreeContext(opt_context); + + // check if traj and top are set + if ( !(p_traj_path && p_top_path) ) + { + std::cerr << "Trajectory and topology files must be set!" << std::endl; + return 1; + } + + traj_path = std::string(p_traj_path); + top_path = std::string(p_top_path); + if ( p_index_path != NULL ) index_path = std::string(p_index_path); + mode = p_mode ? std::string(p_mode) : std::string("intra+same+cross"); + if ( p_weights_path != NULL ) weights_path = std::string(p_weights_path); + if ( p_out_prefix != NULL ) out_prefix = std::string(p_out_prefix); + if ( p_nopbc != NULL ) nopbc = true; + if ( p_blk_avg != NULL ) blk_avg = true; + // check if paths are valid + if ( !std::filesystem::exists(std::filesystem::path(traj_path)) ) + { + std::cerr << "Trajectory file does not exist!" << std::endl; + return 1; + } + if ( !std::filesystem::exists(std::filesystem::path(top_path)) ) + { + std::cerr << "Topology file does not exist!" << std::endl; + return 2; + } + if ( !weights_path.empty() && !std::filesystem::exists(std::filesystem::path(weights_path)) ) + { + std::cerr << "Weights file does not exist!" << std::endl; + return 3; + } + if ( !index_path.empty() &&!std::filesystem::exists(std::filesystem::path(index_path)) ) + { + std::cerr << "Index file does not exist!" << std::endl; + return 4; + } + // if ( !out_prefix.empty() ) + // { + // bool created = std::filesystem::create_directories(std::filesystem::path(out_prefix)); + // if ( !created ) // if not created + // { + // std::cout << "Could not create output directory at " << out_prefix << std::endl; + // if ( std::filesystem::exists(std::filesystem::path(out_prefix)) ) // already exists + // { + // std::cout << "Reason: directory already exists! WARNING: Files might be overwritten!" << std::endl; + // } + // else if ( !std::filesystem::is_directory(std::filesystem::path(out_prefix)) ) // not a directory (file or non-existent path) + // { + // std::cout << "Reason: path is not a directory!" << std::endl; + // } + // else if ( !std::filesystem::exists(std::filesystem::path(out_prefix)) ) // does not exist (no permissions or non-existent parent directory) + // { + // std::cout << "Reason: could not create directory!" << std::endl; + // return 5; + // } + // } + // } + if ( num_threads < 1 ) + { + std::cerr << "Number of threads must be at least 1!" << std::endl; + return 6; + } + if ( mol_threads < 1 ) + { + std::cout << "Setting molecule threads to number of threads!" << std::endl; + mol_threads = num_threads; + } + if ( dt < 0 ) + { + std::cerr << "Time step must be a positive number!" << std::endl; + return 7; + } + if ( nskip < 0 ) + { + std::cerr << "Number of frames to skip must be at least 0!" << std::endl; + return 8; + } + if ( cutoff <= 0.0 ) + { + std::cerr << "Cutoff distance must be greater than 0!" << std::endl; + return 9; + } + if ( mol_cutoff <= 0.0 ) + { + std::cerr << "Molecule cutoff distance must be greater than 0!" << std::endl; + return 10; + } + if ( t_begin < 0.0 ) + { + std::cerr << "Start time must be at least 0!" << std::endl; + return 11; + } + if ( t_end < t_begin && t_end != -1.0 ) + { + std::cerr << "End time must be greater than start time!" << std::endl; + return 12; + } + + resdata::RESData resdata( + top_path, traj_path, cutoff, mol_cutoff, nskip, num_threads, mol_threads, dt, + mode, weights_path, nopbc, t_begin, t_end, index_path, blk_avg + ); + resdata.run(); + // cmdata.process_data(); + resdata.write_output(out_prefix); + + return 0; +} diff --git a/tools/resdata/popt.patch b/tools/resdata/popt.patch new file mode 100644 index 00000000..404cf161 --- /dev/null +++ b/tools/resdata/popt.patch @@ -0,0 +1,42 @@ +diff --color -Naur CMakeLists.txt.origCMakeLists.txt +--- CMakeLists.txt 2024-03-14 16:24:38.310420219 +0100 ++++ CMakeLists.txt 2024-03-14 16:25:42.162353238 +0100 +@@ -103,12 +103,6 @@ + + + add_subdirectory(src) +-add_subdirectory(doc) + if (EXISTS po/popt.pot) + add_subdirectory(po) + endif() +- +-# Enable testing +-include(CTest) +-enable_testing() +-add_subdirectory(tests) +diff --color -Naur src/CMakeLists.txt.orig src/CMakeLists.txt +--- src/CMakeLists.txt 2024-03-14 16:23:52.010463824 +0100 ++++ src/CMakeLists.txt 2024-03-14 16:26:29.646298783 +0100 +@@ -25,13 +25,15 @@ + endif() + + set_target_properties(popt PROPERTIES +- VERSION ${PROJECT_VERSION} +- SOVERSION ${POPT_SOVERSION} +- C_STANDARD 99 +- C_STANDARD_REQUIRED ON +- C_EXTENSIONS ON +- PUBLIC_HEADER popt.h +- LINK_FLAGS "-Wl,--no-undefined -Wl,--version-script,\"${PROJECT_SOURCE_DIR}/src/libpopt.vers\"" ++ VERSION ${PROJECT_VERSION} ++ SOVERSION ${POPT_SOVERSION} ++ C_STANDARD 99 ++ C_STANDARD_REQUIRED ON ++ C_EXTENSIONS ON ++ PUBLIC_HEADER popt.h ++ if(not APPLE) ++ LINK_FLAGS "-Wl,--no-undefined -Wl,--version-script,\"${PROJECT_SOURCE_DIR}/src/libpopt.vers\"" ++ endif() + ) + + # Install the library diff --git a/tools/resdata/src/resdata/block_avg.hpp b/tools/resdata/src/resdata/block_avg.hpp new file mode 100644 index 00000000..5a5e075d --- /dev/null +++ b/tools/resdata/src/resdata/block_avg.hpp @@ -0,0 +1,210 @@ +#ifndef _RESDATA_BLOCKAVG_HPP +#define _RESDATA_BLOCKAVG_HPP + +#include +#include +#include + +#include "indexing.hpp" + +namespace resdata::blockavg +{ + +void AccumulateInterCrossBlock( + const int i,const int n_x_,const std::vector &mol_id_, const std::vector> &cross_index_, const std::vector &natmol2_, + std::vector>>> &block_resmat_cross_p_,std::vector>>> &block_resmat_cross_p2_,std::vector>>> &block_resmat_cross_p_temp, std::vector>>> &block_counter_cross, + const std::vector &block_sizes_,std::vector>>> &appo_cross_p, + const std::vector &num_mol_unique_ +) +{ + for(int jk=i+1; jk < natmol2_.size() ; jk++) + { + for(int i_bk = 0; i_bk0.) + { + appo+=1; + } + } + if(n_x_>0 && appo>0)block_resmat_cross_p_temp[cross_index_[i][jk]][i_bk][res_ik][res_jk] += 1.; + if(std::fmod(n_x_, block_sizes_[i_bk]) == 0 && n_x_ > 0) + { + double a = block_resmat_cross_p_temp[cross_index_[i][jk]][i_bk][res_ik][res_jk]/block_sizes_[i_bk]; + + block_resmat_cross_p_[cross_index_[i][jk]][i_bk][res_ik][res_jk] += a; + block_resmat_cross_p2_[cross_index_[i][jk]][i_bk][res_ik][res_jk] += a*a; + + block_resmat_cross_p_temp[cross_index_[i][jk]][i_bk][res_ik][res_jk] = 0.; + block_counter_cross[cross_index_[i][jk]][i_bk][res_ik][res_jk]+=1; + } + } + } + } + } +} + + +void AccumulateInterSameBlock( + const int i,const int n_x_,const std::vector &mol_id_, const std::vector &natmol2_, + std::vector>>> &block_resmat_same_p_,std::vector>>> &block_resmat_same_p2_,std::vector>>> &block_resmat_same_p_temp, std::vector>>> &block_counter_same, + const std::vector &block_sizes_,std::vector>>> &appo_same_p, + const std::vector &num_mol_unique_ +) +{ + for(int i_bk = 0; i_bk0.) + { + if(n_x_>0)block_resmat_same_p_temp[i][i_bk][res_i][res_j] += appo_same_p[i][im][res_i][res_j]; + if(std::fmod(n_x_, block_sizes_[i_bk]) == 0 && n_x_ > 0 && i+1==num_mol_unique_[i]) + { + double a = block_resmat_same_p_temp[i][i_bk][res_i][res_j] / (block_sizes_[i_bk]*num_mol_unique_[i]); + + block_resmat_same_p_[i][i_bk][res_i][res_j] += a; + block_resmat_same_p2_[i][i_bk][res_i][res_j] += a*a; + + block_resmat_same_p_temp[i][i_bk][res_i][res_j] = 0.; + block_counter_same[i][i_bk][res_i][res_j]+=1; + } + } + } + } + } + } +} + +void AccumulateIntraBlock( + const int i,const int n_x_,const std::vector &mol_id_, const std::vector &natmol2_, + std::vector>>> &block_resmat_intra_p_,std::vector>>> &block_resmat_intra_p2_,std::vector>>> &block_resmat_intra_p_temp, std::vector>>> &block_counter_intra, + const std::vector &block_sizes_,std::vector>>> &appo_intra_p, + const std::vector &num_mol_unique_ + +) +{ + for(int i_bk = 0; i_bk0.) + { + if(n_x_>0)block_resmat_intra_p_temp[i][i_bk][res_i][res_j] += appo_intra_p[i][im][res_i][res_j]; + if(std::fmod(n_x_, block_sizes_[i_bk]) == 0 && n_x_ > 0 && i+1==num_mol_unique_[i]) + { + double a = block_resmat_intra_p_temp[i][i_bk][res_i][res_j] / (block_sizes_[i_bk]*num_mol_unique_[i]); + + block_resmat_intra_p_[i][i_bk][res_i][res_j] += a; + block_resmat_intra_p2_[i][i_bk][res_i][res_j] += a*a; + + block_resmat_intra_p_temp[i][i_bk][res_i][res_j] = 0.; + block_counter_intra[i][i_bk][res_i][res_j]+=1; + } + } + } + } + } + } +} + + + + +void NormilizeInterCrossBlock( + std::vector>>> &block_resmat_cross_std_, std::vector>>> &block_resmat_cross_p_, + std::vector>>> &block_resmat_cross_p2_,std::vector>>> &block_counter_cross +) +{ + for(int i=0; i>>> &block_resmat_same_std_, std::vector>>> &block_resmat_same_p_, + std::vector>>> &block_resmat_same_p2_,std::vector>>> &block_counter_same, + const std::vector &num_mol_unique_ +) +{ + for(int i=0; i0) + { + block_resmat_same_p_[i][i_bk][res_i][res_j]/=block_counter_same[i][i_bk][res_i][res_j]; + block_resmat_same_p2_[i][i_bk][res_i][res_j]/=block_counter_same[i][i_bk][res_i][res_j]; + block_resmat_same_std_[i][i_bk][res_i][res_j]=std::sqrt((block_resmat_same_p2_[i][i_bk][res_i][res_j]-block_resmat_same_p_[i][i_bk][res_i][res_j]*block_resmat_same_p_[i][i_bk][res_i][res_j])/block_counter_same[i][i_bk][res_i][res_j]); + } + else block_resmat_same_std_[i][i_bk][res_i][res_j]=0.; + + } + } + } + } +} + +void NormilizeIntraBlock( + std::vector>>> &block_resmat_intra_std_, std::vector>>> &block_resmat_intra_p_, + std::vector>>> &block_resmat_intra_p2_,std::vector>>> &block_counter_intra, + const std::vector &num_mol_unique_ +) +{ + for(int i=0; i0) + { + block_resmat_intra_p_[i][i_bk][res_i][res_j]/=block_counter_intra[i][i_bk][res_i][res_j]; + block_resmat_intra_p2_[i][i_bk][res_i][res_j]/=block_counter_intra[i][i_bk][res_i][res_j]; + block_resmat_intra_std_[i][i_bk][res_i][res_j]=std::sqrt((block_resmat_intra_p2_[i][i_bk][res_i][res_j]-block_resmat_intra_p_[i][i_bk][res_i][res_j]*block_resmat_intra_p_[i][i_bk][res_i][res_j])/block_counter_intra[i][i_bk][res_i][res_j]); + } + else block_resmat_intra_std_[i][i_bk][res_i][res_j]=0.; + + } + + } + } + } +} + + +} // namespace resdata::block_avg + +#endif // _RESDATA_BLOCKAVG_HPP diff --git a/tools/resdata/src/resdata/density.hpp b/tools/resdata/src/resdata/density.hpp new file mode 100644 index 00000000..55c0c5fd --- /dev/null +++ b/tools/resdata/src/resdata/density.hpp @@ -0,0 +1,182 @@ +#ifndef _RESDATA_DENSIY_HPP +#define _RESDATA_DENSIY_HPP + +#include +#include +#include + +#include "indexing.hpp" + +namespace resdata::density +{ +//RESIDUE STUFF + +void AccumulateIntra( + const int i,const std::vector &mol_id_, + std::vector>> &resmat_intra_d_, std::vector>> &resmat_intra_p_, + std::vector>>> &appo_intra_d, std::vector>>> &appo_intra_p, + const std::vector &num_mol_unique_ +) +{ + for(int im = 0; im0.) + { + resmat_intra_d_[i][res_i][res_j]+=appo_intra_d[i][im][res_i][res_j]; + resmat_intra_p_[i][res_i][res_j]+=1.; + } + appo_intra_d[i][im][res_i][res_j] = 100.; + appo_intra_p[i][im][res_i][res_j] = 0.; + } + } + } +} + +void AccumulateInterSame( + const int i,const std::vector &mol_id_, + std::vector>> &resmat_same_d_, std::vector>> &resmat_same_p_, + std::vector>>> &appo_same_d, std::vector>>> &appo_same_p, + const std::vector &num_mol_unique_ + +) +{ + for(int im = 0; im0.) + { + resmat_same_d_[i][res_i][res_j]+=appo_same_d[i][im][res_i][res_j]; + resmat_same_p_[i][res_i][res_j]+=1.; + } + appo_same_d[i][im][res_i][res_j] = 100.; + appo_same_p[i][im][res_i][res_j] = 0.; + } + } + } +} + +void AccumulateInterCross( + const int i,const std::vector &mol_id_, const std::vector> &cross_index_, const std::vector &natmol2_, + std::vector>> &resmat_cross_d_, std::vector>> &resmat_cross_p_, + std::vector>>> &appo_cross_d, std::vector>>> &appo_cross_p, + const std::vector &num_mol_unique_ + +) +{ + for(int jk=i+1; jk < natmol2_.size() ; jk++) + { + for(int res_ik=0; res_ik < resmat_cross_p_[cross_index_[i][jk]].size() ; res_ik++) + { + for(int res_jk=0; res_jk < resmat_cross_p_[cross_index_[i][jk]][0].size() ; res_jk++) + { + double d_appo=100.; + double count_appo=0.; + for(int im = 0; im0.) + { + d_appo=std::min(appo_cross_d[cross_index_[i][jk]][im][res_ik][res_jk], d_appo); + count_appo+=1; + } + appo_cross_d[cross_index_[i][jk]][im][res_ik][res_jk] = 100.; + appo_cross_p[cross_index_[i][jk]][im][res_ik][res_jk] = 0.; + } + if(count_appo>0){ + resmat_cross_d_[cross_index_[i][jk]][res_ik][res_jk] += d_appo; + resmat_cross_p_[cross_index_[i][jk]][res_ik][res_jk] += 1.; + } + } + } + } +} + +void NormilizeInterSame( + const int n_x_, + std::vector>> &resmat_same_d_, std::vector>> &resmat_same_p_, const std::vector &num_mol_unique_ +) +{ + for(int i=0; i0.) + { + resmat_same_d_[i][res_i][res_j] = resmat_same_d_[i][res_i][res_j]/resmat_same_p_[i][res_i][res_j];///num_mol_unique_[i]; + resmat_same_p_[i][res_i][res_j] = resmat_same_p_[i][res_i][res_j]/n_x_/num_mol_unique_[i]; + } + else + { + resmat_same_d_[i][res_i][res_j] = 0.; + resmat_same_p_[i][res_i][res_j] = 0.; + } + } + } + } +} + +void NormilizeIntra( + const int n_x_, + std::vector>> &resmat_intra_d_, std::vector>> &resmat_intra_p_, const std::vector &num_mol_unique_ +) +{ + for(int i=0; i0.) + { + resmat_intra_d_[i][res_i][res_j] = resmat_intra_d_[i][res_i][res_j]/resmat_intra_p_[i][res_i][res_j];///num_mol_unique_[i]; + resmat_intra_p_[i][res_i][res_j] = resmat_intra_p_[i][res_i][res_j]/n_x_/num_mol_unique_[i]; + } + else + { + resmat_intra_d_[i][res_i][res_j] = 0.; + resmat_intra_p_[i][res_i][res_j] = 0.; + } + } + } + } +} + +void NormilizeInterCross( + const int n_x_, + std::vector>> &resmat_cross_d_, std::vector>> &resmat_cross_p_ +) +{ + for(int i=0; i0.) + { + resmat_cross_d_[i][res_i][res_j] = resmat_cross_d_[i][res_i][res_j]/resmat_cross_p_[i][res_i][res_j]; + resmat_cross_p_[i][res_i][res_j] = resmat_cross_p_[i][res_i][res_j]/n_x_; + } + else + { + resmat_cross_d_[i][res_i][res_j] = 0.; + resmat_cross_p_[i][res_i][res_j] = 0.; + } + } + } + } +} + + +} // namespace resdata::density + +#endif // _RESDATA_DENSIY_HPP diff --git a/tools/resdata/src/resdata/function_types.hpp b/tools/resdata/src/resdata/function_types.hpp new file mode 100644 index 00000000..97c83ede --- /dev/null +++ b/tools/resdata/src/resdata/function_types.hpp @@ -0,0 +1,24 @@ +#ifndef _RESDATA_FUNCTION_TYPES_HPP +#define _RESDATA_FUNCTION_TYPES_HPP + +namespace resdata::ftypes +{ + template + struct function_traits; + + template + struct function_traits { + using return_type = Ret; + using args_tuple = std::tuple; + // Define a type for the function signature + using signature = Ret(Args...); + }; + + // does nothing while taking the same arguments as the function + template + auto do_nothing() { + return [](auto&&... args) -> typename function_traits::return_type {}; + } +} + +#endif // _RESDATA_FUNCTION_TYPES_HPP \ No newline at end of file diff --git a/tools/resdata/src/resdata/indexing.hpp b/tools/resdata/src/resdata/indexing.hpp new file mode 100644 index 00000000..2f954ed1 --- /dev/null +++ b/tools/resdata/src/resdata/indexing.hpp @@ -0,0 +1,16 @@ +#ifndef _RESDATA_INDEXING_HPP +#define _RESDATA_INDEXING_HPP + +#include + +namespace resdata::indexing +{ + +std::size_t mutex_access( std::size_t i, std::size_t r_i, std::size_t r_j, const std::vector &num_res_per_molecule ) +{ + return r_i * num_res_per_molecule[i] + r_j; +} + +} // namespace resdata::indexing + +#endif // _RESDATA_indexing_HPP \ No newline at end of file diff --git a/tools/resdata/src/resdata/io.hpp b/tools/resdata/src/resdata/io.hpp new file mode 100644 index 00000000..06d6a41b --- /dev/null +++ b/tools/resdata/src/resdata/io.hpp @@ -0,0 +1,606 @@ +#ifndef _RESDATA_IO_HPP +#define _RESDATA_IO_HPP + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define COUT_FLOAT_PREC6 std::fixed << std::setprecision(6) + +static inline void mtopGetMolblockIndex(const gmx_mtop_t& mtop, + int globalAtomIndex, + int* moleculeBlock, + int* moleculeIndex, + int* atomIndexInMolecule) +{ + // GMX_ASSERT(globalAtomIndex >= 0, "The atom index to look up should not be negative"); + // GMX_ASSERT(globalAtomIndex < mtop.natoms, "The atom index to look up should be within range"); + // GMX_ASSERT(moleculeBlock != nullptr, "molBlock can not be NULL"); + // GMX_ASSERT(!mtop.moleculeBlockIndices.empty(), "The moleculeBlockIndices should not be empty"); + // GMX_ASSERT(*moleculeBlock >= 0, + // "The starting molecule block index for the search should not be negative"); + // GMX_ASSERT(*moleculeBlock < gmx::ssize(mtop.moleculeBlockIndices), + // "The starting molecule block index for the search should be within range"); + + /* Search the molecule block index using bisection */ + int molBlock0 = -1; + int molBlock1 = mtop.molblock.size(); + + int globalAtomStart = 0; + while (TRUE) + { + globalAtomStart = mtop.moleculeBlockIndices[*moleculeBlock].globalAtomStart; + if (globalAtomIndex < globalAtomStart) + { + molBlock1 = *moleculeBlock; + } + else if (globalAtomIndex >= mtop.moleculeBlockIndices[*moleculeBlock].globalAtomEnd) + { + molBlock0 = *moleculeBlock; + } + else + { + break; + } + *moleculeBlock = ((molBlock0 + molBlock1 + 1) >> 1); + } + + int molIndex = (globalAtomIndex - globalAtomStart) + / mtop.moleculeBlockIndices[*moleculeBlock].numAtomsPerMolecule; + if (moleculeIndex != nullptr) + { + *moleculeIndex = molIndex; + } + if (atomIndexInMolecule != nullptr) + { + *atomIndexInMolecule = globalAtomIndex - globalAtomStart + - molIndex * mtop.moleculeBlockIndices[*moleculeBlock].numAtomsPerMolecule; + } +} +void mtopGetAtomAndResidueName(const gmx_mtop_t& mtop, + int globalAtomIndex, + int* moleculeBlock, + const char** atomName, + int* residueNumber, + const char** residueName, + int* globalResidueIndex) +{ + int moleculeIndex = 0; + int atomIndexInMolecule = 0; + mtopGetMolblockIndex(mtop, globalAtomIndex, moleculeBlock, &moleculeIndex, &atomIndexInMolecule); + + const gmx_molblock_t& molb = mtop.molblock[*moleculeBlock]; + const t_atoms& atoms = mtop.moltype[molb.type].atoms; + const MoleculeBlockIndices& indices = mtop.moleculeBlockIndices[*moleculeBlock]; + if (atomName != nullptr) + { + *atomName = *(atoms.atomname[atomIndexInMolecule]); + } + if (residueNumber != nullptr) + { + if (atoms.nres > mtop.maxResiduesPerMoleculeToTriggerRenumber()) + { + *residueNumber = atoms.resinfo[atoms.atom[atomIndexInMolecule].resind].nr; + } + else + { + /* Single residue molecule, keep counting */ + *residueNumber = indices.residueNumberStart + moleculeIndex * atoms.nres + + atoms.atom[atomIndexInMolecule].resind; + } + } + if (residueName != nullptr) + { + *residueName = *(atoms.resinfo[atoms.atom[atomIndexInMolecule].resind].name); + } + if (globalResidueIndex != nullptr) + { + *globalResidueIndex = indices.globalResidueStart + moleculeIndex * atoms.nres + + atoms.atom[atomIndexInMolecule].resind; + } +} + +namespace resdata::io +{ + +std::vector read_weights_file( const std::string &path ) +{ + std::ifstream infile(path); + if (!infile.good()) + { + std::string errorMessage = "Cannot find the indicated weights file"; + throw std::runtime_error(errorMessage.c_str()); + } + std::vector w; + + std::string line; + while ( std::getline(infile, line) ) + { + std::string value; + std::istringstream iss(line); + if (line == "") + { + printf("Detected empty line. Skipping...\n"); + continue; + } + iss >> value; + w.push_back(std::stod(value)); + } + + if (w.size() == 0) + { + std::string errorMessage = "The weights file is empty"; + throw std::runtime_error(errorMessage.c_str()); + } + + for ( std::size_t i = 0; i < w.size(); i++ ) + { + if (w[i] < 0) + { + std::string errorMessage = "The weights file contains negative values"; + throw std::runtime_error(errorMessage.c_str()); + } + } + + return w; +} + + +// gmx::RangePartitioning gmx_mtop_molecules(const gmx_mtop_t& mtop) +// { +// gmx::RangePartitioning mols; + +// for (const gmx_molblock_t& molb : mtop.molblock) +// { +// int numAtomsPerMolecule = mtop.moltype[molb.type].atoms.nr; +// for (int mol = 0; mol < molb.nmol; mol++) +// { +// mols.appendBlock(numAtomsPerMolecule); +// } +// } + +// return mols; +// } + +void f_write_intra(const std::string &output_prefix, + std::size_t i, int ii, const std::vector &density_bins, const std::vector &natmol2, + const std::vector>>> &intram_mat_density +) +{ + std::filesystem::path ffh_intra = output_prefix + "intra_mol_" + std::to_string(i + 1) + "_" + std::to_string(i + 1) + "_aa_" + std::to_string(ii + 1) + ".dat"; + std::ofstream fp_intra(ffh_intra); + for ( std::size_t k = 0; k < density_bins.size(); k++ ) + { + fp_intra << COUT_FLOAT_PREC6 << density_bins[k]; + for (int jj = 0; jj < natmol2[i]; jj++) + { + fp_intra << " " << COUT_FLOAT_PREC6 << intram_mat_density[i][ii][jj][k]; + } + fp_intra << "\n"; + } + + fp_intra.close(); +} + +void f_write_inter_same(const std::string &output_prefix, + std::size_t i, int ii, const std::vector &density_bins, const std::vector &natmol2, + const std::vector>>> &interm_same_mat_density, + const std::vector>>> &interm_same_maxcdf_mol +) +{ + std::filesystem::path ffh_inter = output_prefix + "inter_mol_" + std::to_string(i + 1) + "_" + std::to_string(i + 1) + "_aa_" + std::to_string(ii + 1) + ".dat"; + std::ofstream fp_inter(ffh_inter); + std::filesystem::path ffh_inter_cum = output_prefix + "inter_mol_c_" + std::to_string(i + 1) + "_" + std::to_string(i + 1) + "_aa_" + std::to_string(ii + 1) + ".dat"; + std::ofstream fp_inter_cum(ffh_inter_cum); + for ( std::size_t k = 0; k < density_bins.size(); k++ ) + { + fp_inter << COUT_FLOAT_PREC6 << density_bins[k]; + fp_inter_cum << COUT_FLOAT_PREC6 << density_bins[k]; + for (int jj = 0; jj < natmol2[i]; jj++) + { + fp_inter << " " << COUT_FLOAT_PREC6 << interm_same_mat_density[i][ii][jj][k]; + fp_inter_cum << " " << COUT_FLOAT_PREC6 << interm_same_maxcdf_mol[i][ii][jj][k]; + } + fp_inter << "\n"; + fp_inter_cum << "\n"; + } + fp_inter.close(); + fp_inter_cum.close(); +} + +void f_write_inter_cross(const std::string &output_prefix, + std::size_t i, std::size_t j, int ii, const std::vector &density_bins, const std::vector &natmol2, + const std::vector> &cross_index, + const std::vector>>> &interm_cross_mat_density, + const std::vector>>> &interm_cross_maxcdf_mol +) +{ + std::filesystem::path ffh = output_prefix + "inter_mol_" + std::to_string(i + 1) + "_" + std::to_string(j + 1) + "_aa_" + std::to_string(ii + 1) + ".dat"; + std::ofstream fp(ffh); + std::filesystem::path ffh_cum = output_prefix + "inter_mol_c_" + std::to_string(i + 1) + "_" + std::to_string(j + 1) + "_aa_" + std::to_string(ii + 1) + ".dat"; + std::ofstream fp_cum(ffh_cum); + for ( std::size_t k = 0; k < interm_cross_mat_density[cross_index[i][j]][ii][0].size(); k++ ) + { + fp << std::fixed << std::setprecision(7) << density_bins[k]; + fp_cum << std::fixed << std::setprecision(7) << density_bins[k]; + for (int jj = 0; jj < natmol2[j]; jj++) + { + fp << " " << std::fixed << std::setprecision(7) << interm_cross_mat_density[cross_index[i][j]][ii][jj][k]; + fp_cum << " " << std::fixed << std::setprecision(7) << interm_cross_maxcdf_mol[cross_index[i][j]][ii][jj][k]; + } + fp << "\n"; + fp_cum << "\n"; + } + fp.close(); + fp_cum.close(); +} + +void f_write_inter_cross_res(const std::string &output_prefix, + std::size_t i, std::size_t j, const std::vector &num_res_per_molecule, + const std::vector> &cross_index, + const std::vector &residue_indeces_, + const std::vector>> &resmat_cross_d_, + const std::vector>> &resmat_cross_p_ +) +{ + std::filesystem::path ffh = output_prefix + "inter_res_mol_" + std::to_string(i + 1) + "_" + std::to_string(j + 1) + ".dat"; + std::ofstream fp(ffh); + + for ( std::size_t res_i = 0; res_i < num_res_per_molecule[i]; res_i++ ) + { + for ( std::size_t res_j = 0; res_j < num_res_per_molecule[j]; res_j++ ) + { + fp << std::fixed << std::setprecision(7) << i + 1; + fp << " " << std::fixed << std::setprecision(7) << res_i + 1; + fp << " " << std::fixed << std::setprecision(7) << j + 1; + fp << " " << std::fixed << std::setprecision(7) << res_j + 1; + fp << " " << std::fixed << std::setprecision(7) << resmat_cross_d_[cross_index[i][j]][res_i][res_j]; + fp << " " << std::fixed << std::setprecision(7) << resmat_cross_p_[cross_index[i][j]][res_i][res_j]; + fp << "\n"; + } + } + fp.close(); +} + +void f_write_inter_cross_res_block(const std::string &output_prefix, + std::size_t i, std::size_t j, const std::vector &num_res_per_molecule, + const std::vector> &cross_index, + const std::vector &residue_indeces_, + const std::vector>> &resmat_cross_p_, + const std::vector>>> &block_resmat_cross_std_, + const std::vector &block_sizes_ +) +{ + std::filesystem::path ffh = output_prefix + "inter_res_mol_" + std::to_string(i + 1) + "_" + std::to_string(j + 1) + ".blocks.dat"; + std::ofstream fp(ffh); + //header + fp <<"# mol_i res_i mol_j res_j p_ij {blocks_p_ij}"; + fp <<"\n#"; + for (std::size_t i_bk=0; i_bk < block_sizes_.size(); i_bk++) + { + fp << " " << std::fixed << std::setprecision(1) << block_sizes_[i_bk]; + } + fp << "\n"; + for ( std::size_t res_i = 0; res_i < num_res_per_molecule[i]; res_i++ ) + { + for ( std::size_t res_j = 0; res_j < num_res_per_molecule[j]; res_j++ ) + { + fp << std::fixed << std::setprecision(7) << i + 1; + fp << " " << std::fixed << std::setprecision(7) << res_i + 1; + fp << " " << std::fixed << std::setprecision(7) << j + 1; + fp << " " << std::fixed << std::setprecision(7) << res_j + 1; + fp << " " << std::fixed << std::setprecision(7) << resmat_cross_p_[cross_index[i][j]][res_i][res_j]; + // fp << " ; "; + for (std::size_t i_bk=0; i_bk < block_sizes_.size(); i_bk++) + { + fp << " " << std::fixed << std::setprecision(7) << block_resmat_cross_std_[cross_index[i][j]][i_bk][res_i][res_j]; + } + fp << "\n"; + } + } + fp.close(); +} + +void f_write_inter_same_res(const std::string &output_prefix, + std::size_t i, const std::vector &num_res_per_molecule, + //const std::vector> &mol_id_, + const std::vector &residue_indeces_, + const std::vector>> &resmat_same_d_, + const std::vector>> &resmat_same_p_ +) +{ + std::filesystem::path ffh = output_prefix + "inter_res_mol_" + std::to_string(i + 1) + "_" + std::to_string(i + 1) + ".dat"; + std::ofstream fp(ffh); + + for ( std::size_t res_i = 0; res_i < num_res_per_molecule[i]; res_i++ ) + { + for ( std::size_t res_j = 0; res_j < num_res_per_molecule[i]; res_j++ ) + { + fp << std::fixed << std::setprecision(7) << i + 1; + fp << " " << std::fixed << std::setprecision(7) << res_i + 1; + fp << " " << std::fixed << std::setprecision(7) << i + 1; + fp << " " << std::fixed << std::setprecision(7) << res_j + 1; + fp << " " << std::fixed << std::setprecision(7) << resmat_same_d_[i][res_i][res_j]; + fp << " " << std::fixed << std::setprecision(7) << resmat_same_p_[i][res_i][res_j]; + fp << "\n"; + } + } + fp.close(); +} + + +void f_write_inter_same_res_block(const std::string &output_prefix, + std::size_t i, const std::vector &num_res_per_molecule, + const std::vector &residue_indeces_, + const std::vector>> &resmat_same_p_, + const std::vector>>> &block_resmat_same_std_, + const std::vector &block_sizes_ +) +{ + std::filesystem::path ffh = output_prefix + "inter_res_mol_" + std::to_string(i + 1) + "_" + std::to_string(i + 1) + ".blocks.dat"; + std::ofstream fp(ffh); + //header + fp <<"# mol_i res_i mol_j res_j p_ij {blocks_p_ij}"; + fp <<"\n#"; + for (std::size_t i_bk=0; i_bk < block_sizes_.size(); i_bk++) + { + fp << " " << std::fixed << std::setprecision(1) << block_sizes_[i_bk]; + } + fp << "\n"; + + for ( std::size_t res_i = 0; res_i < num_res_per_molecule[i]; res_i++ ) + { + for ( std::size_t res_j = 0; res_j < num_res_per_molecule[i]; res_j++ ) + { + fp << std::fixed << std::setprecision(7) << i + 1; + fp << " " << std::fixed << std::setprecision(7) << res_i + 1; + fp << " " << std::fixed << std::setprecision(7) << i + 1; + fp << " " << std::fixed << std::setprecision(7) << res_j + 1; + fp << " " << std::fixed << std::setprecision(7) << resmat_same_p_[i][res_i][res_j]; + // fp << " ; "; + for (std::size_t i_bk=0; i_bk < block_sizes_.size(); i_bk++) + { + fp << " " << std::fixed << std::setprecision(7) << block_resmat_same_std_[i][i_bk][res_i][res_j]; + } + fp << "\n"; + } + } + fp.close(); +} + +void f_write_intra_res(const std::string &output_prefix, + std::size_t i, const std::vector &num_res_per_molecule, + //const std::vector> &mol_id_, + const std::vector &residue_indeces_, + const std::vector>> &resmat_same_d_, + const std::vector>> &resmat_same_p_ +) +{ + std::filesystem::path ffh = output_prefix + "intra_res_mol_" + std::to_string(i + 1) + "_" + std::to_string(i + 1) + ".dat"; + std::ofstream fp(ffh); + + for ( std::size_t res_i = 0; res_i < num_res_per_molecule[i]; res_i++ ) + { + for ( std::size_t res_j = 0; res_j < num_res_per_molecule[i]; res_j++ ) + { + fp << std::fixed << std::setprecision(7) << i + 1; + fp << " " << std::fixed << std::setprecision(7) << res_i + 1; + fp << " " << std::fixed << std::setprecision(7) << i + 1; + fp << " " << std::fixed << std::setprecision(7) << res_j + 1; + fp << " " << std::fixed << std::setprecision(7) << resmat_same_d_[i][res_i][res_j]; + fp << " " << std::fixed << std::setprecision(7) << resmat_same_p_[i][res_i][res_j]; + fp << "\n"; + } + } + fp.close(); +} + +void f_write_intra_res_block(const std::string &output_prefix, + std::size_t i, const std::vector &num_res_per_molecule, + const std::vector &residue_indeces_, + const std::vector>> &resmat_intra_p_, + const std::vector>>> &block_resmat_intra_std_, + const std::vector &block_sizes_ +) +{ + std::filesystem::path ffh = output_prefix + "intra_res_mol_" + std::to_string(i + 1) + "_" + std::to_string(i + 1) + ".blocks.dat"; + std::ofstream fp(ffh); + //header + fp <<"# mol_i res_i mol_j res_j p_ij {blocks_p_ij}"; + fp <<"\n#"; + for (std::size_t i_bk=0; i_bk < block_sizes_.size(); i_bk++) + { + fp << " " << std::fixed << std::setprecision(1) << block_sizes_[i_bk]; + } + fp << "\n"; + + for ( std::size_t res_i = 0; res_i < num_res_per_molecule[i]; res_i++ ) + { + for ( std::size_t res_j = 0; res_j < num_res_per_molecule[i]; res_j++ ) + { + fp << std::fixed << std::setprecision(7) << i + 1; + fp << " " << std::fixed << std::setprecision(7) << res_i + 1; + fp << " " << std::fixed << std::setprecision(7) << i + 1; + fp << " " << std::fixed << std::setprecision(7) << res_j + 1; + fp << " " << std::fixed << std::setprecision(7) << resmat_intra_p_[i][res_i][res_j]; + // fp << " ; "; + for (std::size_t i_bk=0; i_bk < block_sizes_.size(); i_bk++) + { + fp << " " << std::fixed << std::setprecision(7) << block_resmat_intra_std_[i][i_bk][res_i][res_j]; + } + fp << "\n"; + } + } + fp.close(); +} + +std::vector read_selection( const std::string &path, const std::string &selection_name ) +{ + bool found = false, finished = false; + std::ifstream infile(path); + if (!infile.good()) + { + std::string errorMessage = "Cannot find the indicated selection file"; + throw std::runtime_error(errorMessage.c_str()); + } + std::vector sel; + + std::string line; + while ( std::getline(infile, line) ) + { + std::string value; + std::istringstream iss(line); + if (line == "") continue; + + // find if regex matches the line (regex is no semicolon followed by 0 or more spaces followed by [ and 0 or more spaces followed by selection_name followed by 0 or more spaces followed by ]) + std::regex re_found("([^;]+)\\s*\\[\\s*" + selection_name + "\\s*\\]"); + // find if regex matches the line (same as above but any selection name) + std::regex re_finished("([^;]+)\\s*\\[\\s*.*\\s*\\]"); + if (std::regex_search(line, re_finished) && found) finished = true; + if (std::regex_search(line, re_found)) found = true; + if (found && !finished) + { + // check if value is a number + while (iss >> value) + { + try + { + std::stoi(value); + } + catch (const std::invalid_argument& e) + { + std::cerr << "Error: " << e.what() << " in line " << line << std::endl; + std::cerr << "The value " << value << " found in " << path << " is not a number" << std::endl; + } + sel.push_back(std::stoi(value)); + } + } + } + + return sel; +} + +/** + * @brief Print a progress bar to the standard output + * + * Taken from https://stackoverflow.com/a/36315819 + * + * @param percentage + */ +void print_progress_bar(float percentage) +{ + constexpr std::size_t PROGRESS_BAR_LENGTH = 60; + constexpr char PROGRESS_BAR[] = "############################################################"; + + int val = (int) (percentage * 100); + int lpad = (int) (percentage * PROGRESS_BAR_LENGTH); + int rpad = PROGRESS_BAR_LENGTH - lpad; + + printf("\r%3d%% [%.*s%*s]", val, lpad, PROGRESS_BAR, rpad, ""); + fflush(stdout); +} + +} // namespace resdata::io + +#endif // _CMDATA_IO_HPP + + +void DefineResidueIndexing(int* index_, const gmx_mtop_t *mtop_, + std::vector &num_mol_unique_,std::vector &natmol2_, + const std::vector &num_mol_unique_temp,const std::vector &natmol2_temp, + std::vector &num_res_per_molecule,std::vector &residue_indeces_ ) +{ + // CREATE VECTOR of RESIDUE indeces to map atom to residue + int res_ii_appo = 0; + int molb=0; + int molb_res=0; + const char * atomname_appo; + const char * residuename_appo; + + // for (int i = 0; i < natmol2_.size(); i++) + // std::cout << natmol2_[i]*num_mol_unique_[i] << " "; + int count=0; + mtopGetAtomAndResidueName(*mtop_, index_[0], &molb, &atomname_appo, &res_ii_appo, &residuename_appo, &molb_res); + int prevres = res_ii_appo; + int rescount = 1, i_mol=1; + + + // RESIDUE FINDING + for (int i_mol_type = 0; i_mol_type < num_mol_unique_.size(); i_mol_type++) + { + if(i_mol_type>0) + { + count += num_mol_unique_[i_mol_type-1]*natmol2_[i_mol_type-1]; + } + for (int i = 0; i < num_mol_unique_[i_mol_type]*natmol2_[i_mol_type]; i++) + { + mtopGetAtomAndResidueName(*mtop_, index_[i+count], &molb, &atomname_appo, &res_ii_appo, &residuename_appo, &molb_res); + // std::cout< num res:%i\n", i_mol_type, rescount); + num_res_per_molecule.push_back(rescount); + + rescount = 0; + i_mol=1; + } + + count=0; + mtopGetAtomAndResidueName(*mtop_, 0, &molb, &atomname_appo, &res_ii_appo, &residuename_appo, &molb_res); + prevres = res_ii_appo; + rescount = 1; + i_mol=1; + + for (int i_mol_type = 0; i_mol_type < num_mol_unique_temp.size(); i_mol_type++) + { + if(i_mol_type>0) + { + count += num_mol_unique_temp[i_mol_type-1]*natmol2_temp[i_mol_type-1]; + } + for (int i = 0; i < num_mol_unique_temp[i_mol_type]*natmol2_temp[i_mol_type]; i++) + { + mtopGetAtomAndResidueName(*mtop_, i+count, &molb, &atomname_appo, &res_ii_appo, &residuename_appo, &molb_res); + + if(res_ii_appo != prevres) + { + if(i==i_mol*natmol2_temp[i_mol_type]) + { + i_mol+=1; + rescount = 1; + prevres = res_ii_appo; + } + else{ + rescount+=1; + prevres = res_ii_appo; + } + } + residue_indeces_.push_back(rescount); + } + + rescount = 0; + i_mol=1; + } +} + diff --git a/tools/resdata/src/resdata/parallel.hpp b/tools/resdata/src/resdata/parallel.hpp new file mode 100644 index 00000000..dccb9f70 --- /dev/null +++ b/tools/resdata/src/resdata/parallel.hpp @@ -0,0 +1,43 @@ +#ifndef _RESDATA_SEMAPHORE_HPP +#define _RESDATA_SEMAPHORE_HPP + +#include +#include + +namespace resdata::parallel +{ + +class Semaphore +{ +private: + std::mutex mut; + std::condition_variable cv; + std::uint64_t counter; + +public: + Semaphore( std::uint64_t counter = 0 ) : mut(std::mutex()), cv(std::condition_variable()), counter(counter) {} + void acquire() + { + std::unique_lock lock(mut); + cv.wait(lock, [this](){ return counter > 0; }); + --counter; + lock.unlock(); + } + + void release() + { + std::unique_lock lock(mut); + ++counter; + lock.unlock(); + cv.notify_one(); + } + + void set_counter( std::uint64_t c ) + { + counter = c; + } +}; + +} // namespace resdata::parallel + +#endif // _RESDATA_SEMAPHORE_HPP \ No newline at end of file diff --git a/tools/resdata/src/resdata/resdata.hpp b/tools/resdata/src/resdata/resdata.hpp new file mode 100644 index 00000000..2f91260a --- /dev/null +++ b/tools/resdata/src/resdata/resdata.hpp @@ -0,0 +1,887 @@ +#ifndef _RESDATA_RESDATA_HPP +#define _RESDATA_RESDATA_HPP + +// gromacs includes +#include +#include +#include +#include +#include +#include + +// resdata includes +#include "io.hpp" +#include "indexing.hpp" +#include "parallel.hpp" +#include "density.hpp" +#include "block_avg.hpp" +//#include "mindist.hpp" +#include "xtc_frame.hpp" +#include "function_types.hpp" + +// standard library imports +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +// xdrfile includes +#include +#include + +namespace resdata +{ + +class RESData +{ +private: + // general fields + int n_x_; + float dt_, t_begin_, t_end_; + int nskip_; + gmx_mtop_t *mtop_; + rvec *xcm_ = nullptr; + double cutoff_, mol_cutoff_, mcut2_, cut_sig_2_; + + // molecule number fields + int nindex_pre_ndx; + int nindex_=0; + // std::vector selection_; + gmx::RangePartitioning mols_pre_ndx; + gmx::RangePartitioning mols_; + std::vector natmol2_; + std::vector mol_id_; + std::vector num_mol_unique_; + bool use_index_ = false; + // weights fields + double weights_sum_; + std::string weights_path_; + std::vector weights_; + + std::string traj_path_; + // pbc fields + bool no_pbc_; + t_pbc *pbc_; + PbcType pbcType_; + + // frame fields + resdata::xtc::Frame *frame_; + XDRFILE *trj_; + + //define the resmat matrix mol_i_type, mol_i, res_i, res_j for probability and distance + using resmat_matrix = std::vector>>; + using resmat_matrix_temp = std::vector>>>; + std::vector residue_indeces_; + resmat_matrix resmat_same_p_; + resmat_matrix resmat_same_d_; + resmat_matrix resmat_intra_p_; + resmat_matrix resmat_intra_d_; + resmat_matrix resmat_cross_p_; + resmat_matrix resmat_cross_d_; + std::vector> cross_index_; + std::vector num_res_per_molecule; + + resmat_matrix_temp appo_cross_d; + resmat_matrix_temp appo_cross_p; + resmat_matrix_temp appo_same_d; + resmat_matrix_temp appo_same_p; + resmat_matrix_temp appo_intra_d; + resmat_matrix_temp appo_intra_p; + + //BLOCK AVERAGE + std::vector block_sizes_; + using block_resmat_matrix = std::vector>>>; + block_resmat_matrix block_resmat_intra_p_; + block_resmat_matrix block_resmat_intra_p2_; + block_resmat_matrix block_resmat_intra_p_temp; + block_resmat_matrix block_resmat_intra_std_; + std::vector>>> block_counter_intra; + block_resmat_matrix block_resmat_same_p_; + block_resmat_matrix block_resmat_same_p2_; + block_resmat_matrix block_resmat_same_p_temp; + block_resmat_matrix block_resmat_same_std_; + std::vector>>> block_counter_same; + block_resmat_matrix block_resmat_cross_p_; + block_resmat_matrix block_resmat_cross_p2_; + block_resmat_matrix block_resmat_cross_p_temp; + block_resmat_matrix block_resmat_cross_std_; + std::vector>>> block_counter_cross; + + bool blk_avg_; + + t_topology top_ress_; + rvec* x; + int natoms_res; + int isize_; + int* index_; + + // temporary containers for maxcdf operations + std::vector> frame_same_mutex_; + std::vector> frame_cross_mutex_; + + // parallelization fields + int num_threads_; + int num_mol_threads_; + std::vector threads_; + std::vector mol_threads_; + resdata::parallel::Semaphore semaphore_; + + // mode selection, booleans and functions + std::string mode_; + bool intra_ = false, same_ = false, cross_ = false; + + static void molecule_routine( + const int i, const int nindex_, t_pbc *pbc, rvec *x, + const double cut_sig_2_, + const std::vector &natmol2_, const std::vector &num_mol_unique_, const std::vector &mol_id_, + const std::vector> &cross_index_, + const double mcut2_, rvec *xcm_, const gmx::RangePartitioning &mols_, gmx_mtop_t *mtop_, + std::vector> &frame_cross_mutex_, + std::vector> &frame_same_mutex_, + resdata::parallel::Semaphore &semaphore_, double weight,const std::vector &residue_indeces_, + resmat_matrix &resmat_same_d_, resmat_matrix &resmat_same_p_, + resmat_matrix_temp &appo_same_d, resmat_matrix_temp &appo_same_p, + resmat_matrix &resmat_intra_d_,resmat_matrix &resmat_intra_p_, + resmat_matrix_temp &appo_intra_d, resmat_matrix_temp &appo_intra_p, + resmat_matrix &resmat_cross_d_,resmat_matrix &resmat_cross_p_, + resmat_matrix_temp &appo_cross_d, resmat_matrix_temp &appo_cross_p, + const bool same, const bool intra, const bool cross, + const int* index, + const std::vector &num_res_per_molecule + ) + { + semaphore_.acquire(); + const char * atomname; + int tmp_i = 0; + std::size_t mol_i = i, mol_j = 0; + while ( static_cast(mol_i) - num_mol_unique_[tmp_i] >= 0 ) + { + mol_i -= num_mol_unique_[tmp_i]; + tmp_i++; + if (tmp_i == num_mol_unique_.size()) + break; + } + if (mol_i == num_mol_unique_[mol_id_[i]]) mol_i = 0; + + int molb = 0; + /* Loop over molecules */ + for (int j = 0; j < nindex_; j++) + { + if((same || intra) && (!cross)){ + if(mol_id_[i]!=mol_id_[j]) continue; + } + if((!same && !intra) && cross){ + if(mol_id_[i]==mol_id_[j]){ + continue; + } + } + if((!same && !intra && !cross)){ + continue; + } + if (j!=0) + if (mol_j == num_mol_unique_[mol_id_[j-1]]) mol_j = 0; + /* intermolecular interactions are evaluated only among neighbour molecules */ + if (i!=j) + { + rvec dx; + if (pbc != nullptr) pbc_dx(pbc, xcm_[i], xcm_[j], dx); + else rvec_sub(xcm_[i], xcm_[j], dx); + double dx2 = iprod(dx, dx); + if (dx2 > mcut2_) continue; + } + /* for molecules of different specie we fill half a matrix */ + if (mol_id_[i] != mol_id_[j] && j < i) continue; + std::size_t a_i = 0; + // GMX_RELEASE_ASSERT(mols_.numBlocks() > 0, "Cannot access index[] from empty mols"); + + /* cycle over the atoms of a molecule i */ + for (std::size_t iii = mols_.block(i).begin(); iii < mols_.block(i).end(); iii++) + { + int ii = index[static_cast(iii)]; + std::size_t a_j = 0; + mtopGetAtomAndResidueName(*mtop_, ii, &molb, &atomname, nullptr, nullptr, nullptr); + if (atomname[0] == 'H') + { + a_i++; + continue; + } + /* cycle over the atoms of a molecule j */ + for (std::size_t jjj = mols_.block(j).begin(); jjj < mols_.block(j).end(); jjj++) + { + int jj = index[static_cast(jjj)]; + mtopGetAtomAndResidueName(*mtop_, jj, &molb, &atomname, nullptr, nullptr, nullptr); + if (atomname[0] == 'H') + { + a_j++; + continue; + } + std::size_t delta = iii-jjj-(i-j)*natmol2_[mol_id_[i]]; + rvec sym_dx; + if (pbc != nullptr) pbc_dx(pbc, x[ii], x[jj], sym_dx); + else rvec_sub(x[ii], x[jj], sym_dx); + double dx2 = iprod(sym_dx, sym_dx); + if (i==j) + { + if(intra) + { + if (dx2 < cut_sig_2_) + { // intra molecule species + std::size_t same_mutex_index = resdata::indexing::mutex_access(mol_id_[i], residue_indeces_[ii]-1, residue_indeces_[jj]-1, num_res_per_molecule); + std::unique_lock lock(frame_same_mutex_[mol_id_[i]][same_mutex_index]); + appo_intra_d[mol_id_[i]][mol_i][residue_indeces_[ii]-1][residue_indeces_[jj]-1] = std::min( appo_intra_d[mol_id_[i]][mol_i][residue_indeces_[ii]-1][residue_indeces_[jj]-1],std::sqrt(dx2)); + appo_intra_p[mol_id_[i]][mol_i][residue_indeces_[ii]-1][residue_indeces_[jj]-1] = 1.; + lock.unlock(); + } + } + } + else + { + if (mol_id_[i]==mol_id_[j]) + { // inter same molecule specie + if(same) + { + if (dx2 < cut_sig_2_) + { + std::size_t same_mutex_index = resdata::indexing::mutex_access(mol_id_[i], residue_indeces_[ii]-1, residue_indeces_[jj]-1, num_res_per_molecule); + std::unique_lock lock(frame_same_mutex_[mol_id_[i]][same_mutex_index]); + appo_same_d[mol_id_[i]][mol_i][residue_indeces_[ii]-1][residue_indeces_[jj]-1] = std::min( appo_same_d[mol_id_[i]][mol_i][residue_indeces_[ii]-1][residue_indeces_[jj]-1],std::sqrt(dx2)); + appo_same_p[mol_id_[i]][mol_i][residue_indeces_[ii]-1][residue_indeces_[jj]-1] = 1.; + lock.unlock(); + } + if (delta!=0.) { + // this is to account for inversion atom/molecule + int ii_delta =index[iii-delta]; + int jj_delta =index[jjj+delta]; + + if (pbc != nullptr) pbc_dx(pbc, x[ii_delta], x[jj_delta], sym_dx); + else rvec_sub(x[ii_delta], x[jj_delta], sym_dx); + dx2 = iprod(sym_dx, sym_dx); + if (dx2 < cut_sig_2_) + { + std::size_t same_mutex_index = resdata::indexing::mutex_access(mol_id_[i], residue_indeces_[ii]-1, residue_indeces_[jj]-1, num_res_per_molecule); + std::unique_lock lock(frame_same_mutex_[mol_id_[i]][same_mutex_index]); + appo_same_d[mol_id_[i]][mol_i][residue_indeces_[jj_delta]-1][residue_indeces_[ii_delta]-1] = std::min( appo_same_d[mol_id_[i]][mol_i][residue_indeces_[jj_delta]-1][residue_indeces_[ii_delta]-1],std::sqrt(dx2)); + appo_same_p[mol_id_[i]][mol_i][residue_indeces_[jj_delta]-1][residue_indeces_[ii_delta]-1] = 1.; + lock.unlock(); + } + } + } + } + else + { // inter cross molecule species + if (dx2 < cut_sig_2_ && cross) + { + std::size_t cross_mutex_index = resdata::indexing::mutex_access(mol_id_[j], residue_indeces_[ii]-1, residue_indeces_[jj]-1, num_res_per_molecule); + std::unique_lock lock(frame_cross_mutex_[cross_index_[mol_id_[i]][mol_id_[j]]][cross_mutex_index]); + appo_cross_d[cross_index_[mol_id_[i]][mol_id_[j]]][mol_i][residue_indeces_[ii]-1][residue_indeces_[jj]-1] = std::min( appo_cross_d[cross_index_[mol_id_[i]][mol_id_[j]]][mol_i][residue_indeces_[ii]-1][residue_indeces_[jj]-1],std::sqrt(dx2)); + appo_cross_p[cross_index_[mol_id_[i]][mol_id_[j]]][mol_i][residue_indeces_[ii]-1][residue_indeces_[jj]-1] = 1.; + lock.unlock(); + } + } + } + ++a_j; + } + ++a_i; + } + ++mol_j; + } + semaphore_.release(); + } + +public: + RESData( + const std::string &top_path, const std::string &traj_path, + double cutoff, double mol_cutoff, int nskip, int num_threads, int num_mol_threads, + int dt, const std::string &mode, const std::string &weights_path, + bool no_pbc, float t_begin, float t_end, const std::string &index_path, bool blk_avg + ) : cutoff_(cutoff), mol_cutoff_(mol_cutoff), nskip_(nskip), num_threads_(num_threads), num_mol_threads_(num_mol_threads), + mode_(mode), weights_path_(weights_path), + no_pbc_(no_pbc), dt_(dt), t_begin_(t_begin), t_end_(t_end), blk_avg_(blk_avg) + { + bool bTop_; + matrix boxtop_; + mtop_ = (gmx_mtop_t*)malloc(sizeof(gmx_mtop_t)); + TpxFileHeader header = readTpxHeader(top_path.c_str(), true); + int natoms; + pbcType_ = read_tpx(top_path.c_str(), nullptr, boxtop_, &natoms, nullptr, nullptr, mtop_); + + if (no_pbc_) + { + pbc_ = nullptr; + } + else + { + pbc_ = (t_pbc*)malloc(sizeof(t_pbc)); + set_pbc(pbc_, pbcType_, boxtop_); + } + + traj_path_ = traj_path; + int natom; + long unsigned int nframe; + int64_t *offsets; + + PbcType pbcType; + matrix box = { { 0 } }; + //read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top_ress, &pbcType, &x, nullptr, box, FALSE); + read_tps_conf(top_path.c_str(), &top_ress_, &pbcType, &x, nullptr, box, FALSE); + + frame_ = (resdata::xtc::Frame*)malloc(sizeof(resdata::xtc::Frame)); + std::cout << "Reading trajectory file " << traj_path << std::endl; + read_xtc_header(traj_path.c_str(), &natom, &nframe, &offsets); + *frame_ = resdata::xtc::Frame(natom); + frame_->nframe = nframe; + frame_->offsets = offsets; + + trj_ = xdrfile_open(traj_path.c_str(), "r"); + + //define index + if ( std::filesystem::exists(std::filesystem::path(index_path)) ) + { + char* grpname; + use_index_ = true; + get_index(&top_ress_.atoms, index_path.c_str(), 1, &isize_, &index_, &grpname); + } + else{ + index_ = new int[natom]; + for(int i=0; ix); + free(frame_->offsets); + free(frame_); + free(mtop_); + if (xcm_ != nullptr) free(xcm_); + } + + void initAnalysis() + /** + * @brief Initializes the analysis by setting up the molecule partitioning and the mode selection + * + * @todo Check if molecule block is empty + */ + { + n_x_ = 0; + + std::vector natmol2_pre_ndx; + std::vector mol_id_pre_ndx; + std::vector num_mol_unique_pre_ndx; + + // get the number of atoms per molecule + // equivalent to mols_ = gmx:gmx_mtop_molecules(*top.mtop()); + for (const gmx_molblock_t &molb : mtop_->molblock) + { + int natm_per_mol = mtop_->moltype[molb.type].atoms.nr; + for (int i = 0; i < molb.nmol; i++) mols_pre_ndx.appendBlock(natm_per_mol); + num_mol_unique_pre_ndx.push_back(molb.nmol); + } + + // number of molecules + nindex_pre_ndx = mols_pre_ndx.numBlocks(); + + printf("Evaluating mode selection:\n"); + std::string tmp_mode; + std::stringstream modestream{ mode_ }; + while (std::getline(modestream, tmp_mode, '+')) + { + if (tmp_mode == std::string("intra")) + { + intra_ = true; + } + else if (tmp_mode == std::string("same")) + { + same_ = true; + } + else if (tmp_mode == std::string("cross")) + { + cross_ = true; + } + else + { + printf("Wrong mode: %s\nMode must be one from: intra, same, cross. Use + to concatenate more than one, i.e. intra+cross\n", tmp_mode.c_str()); + exit(1); + } + printf(" - found %s\n", tmp_mode.c_str()); + } + + int mol_id = 0; + int n_mol_appo = 0, n_mol_counter=0,n_mol_true=0; + int molb_index = 0; + int natom_appo=0,natom_appo_2=0 ; + + //find the number of atom per molecule and the mol_id if there is no index + for ( auto i : num_mol_unique_pre_ndx ) + { + natmol2_pre_ndx.push_back(mols_pre_ndx.block(molb_index).end() - mols_pre_ndx.block(molb_index).begin()); + for ( int j = 0; j < i; j++ ) + { + mol_id_pre_ndx.push_back(mol_id); + } + mol_id++; + molb_index += i; + } + + // find the true number of atoms and mol id per molecule if index is present + printf("\nReading index file\n"); + mol_id = 0; + molb_index = 0; + + for ( int i=0; i0)num_mol_unique_.push_back(n_mol_true); + + n_mol_counter+=num_mol_unique_pre_ndx[mol_id] ; + mol_id++; + if(n_mol_true>0) + { + natmol2_.push_back(natom_appo_2); + } + n_mol_true = 0; + } + natom_appo=0; + //check if all atoms in mols_.block(i) are in the index file + for(int i_atom=mols_pre_ndx.block(i).begin(); i_atom 0) && (natom_appo != natmol2_pre_ndx[mol_id])) + { + printf("\nERROR: Number of atoms given by the index does not match the number of atoms of the corresponding molecule.\nIndex file should be used to remove hole molecules, otherwise do a post processing analysis\n\n"); + printf("Different number of atoms found in molecule %i (molecule type %i): number of atom selected in index: %i, number of atoms of molecule in topology: %i\n",i,mol_id, natom_appo, natmol2_pre_ndx[mol_id]); + printf("Exit Code\n"); + exit(2); + } + else{ + printf(" V mol: %i, mol id: %i, size pre ndx: %i, size after ndx: %i \n",i+1,mol_id, natmol2_pre_ndx[mol_id], natom_appo); + n_mol_true +=1; + nindex_++; + mol_id_.push_back(mol_id); + mols_.appendBlock(natom_appo); + natom_appo_2 = natom_appo; + } + + //molb_index += i; + } + if(n_mol_true>0){ + natmol2_.push_back(natom_appo_2); + num_mol_unique_.push_back(n_mol_true); + } + + // CREATE VECTOR of RESIDUE indeces to map atom to residue + printf("\nAssigning residue index to atoms\n"); + DefineResidueIndexing(index_, mtop_, num_mol_unique_, natmol2_, num_mol_unique_pre_ndx, natmol2_pre_ndx, num_res_per_molecule, residue_indeces_); + + printf("\nNumber of different molecules %lu\n", natmol2_.size()); + bool check_same = false; + for(std::size_t i=0; i1) check_same = true; + } + + if (num_threads_ > std::thread::hardware_concurrency()) + { + num_threads_ = std::thread::hardware_concurrency(); + std::cout << "Maximum thread number surpassed. Scaling num_threads down to " << num_threads_ << std::endl; + } + if (num_mol_threads_ > std::thread::hardware_concurrency()) + { + num_mol_threads_ = std::thread::hardware_concurrency(); + std::cout << "Maximum thread number surpassed. Scaling num_mol_threads down to " << num_mol_threads_ << std::endl; + } + if (num_mol_threads_ > nindex_) + { + num_mol_threads_ = nindex_; + std::cout << "Number of molecule threads surpassed number of molecules. Setting num_mol_threads to " << num_mol_threads_ << std::endl; + } + threads_.resize(num_threads_); + mol_threads_.resize(nindex_); + semaphore_.set_counter(num_mol_threads_); + std::cout << "Using " << num_threads_ << " threads and " << num_mol_threads_ << " molecule threads" << std::endl; + + + printf("\nActivating modes\n"); + if(!check_same && same_) same_ = false; + if(cross_ && natmol2_.size()<2) + { + cross_ = false; + printf(":: deactivating cross mode (only 1 type of molecule found)\n"); + } + + if(nindex_>1 && (same_ || cross_)) + { + printf("\n\n::::::::::::WARNING::::::::::::\nMore than 1 molcule found in the system.\nFix pbc before running resdata using pbc mol\n"); + printf(":::::::::::::::::::::::::::::::\n\n"); + } + + if (same_) + { + std::cout << ":: activating intermat same calculations" << std::endl; + resmat_same_d_.resize(natmol2_.size()); + resmat_same_p_.resize(natmol2_.size()); + appo_same_d.resize(natmol2_.size()); + appo_same_p.resize(natmol2_.size()); + if(blk_avg_) + { + block_resmat_same_p_.resize(natmol2_.size() ); + block_resmat_same_p2_.resize(natmol2_.size()); + block_resmat_same_p_temp.resize(natmol2_.size() ); + block_resmat_same_std_.resize(natmol2_.size() ); + block_counter_same.resize(natmol2_.size()); + } + } + if (cross_) + { + std::cout << ":: activating intermat cross calculations" << std::endl; + resmat_cross_d_.resize((natmol2_.size() * (natmol2_.size() - 1)) / 2); + resmat_cross_p_.resize((natmol2_.size() * (natmol2_.size() - 1)) / 2); + appo_cross_d.resize((natmol2_.size() * (natmol2_.size() - 1)) / 2); + appo_cross_p.resize((natmol2_.size() * (natmol2_.size() - 1)) / 2); + if(blk_avg_) + { + block_resmat_cross_p_.resize((natmol2_.size() * (natmol2_.size() - 1)) / 2); + block_resmat_cross_p2_.resize((natmol2_.size() * (natmol2_.size() - 1)) / 2); + block_resmat_cross_p_temp.resize((natmol2_.size() * (natmol2_.size() - 1)) / 2); + block_resmat_cross_std_.resize((natmol2_.size() * (natmol2_.size() - 1)) / 2); + block_counter_cross.resize((natmol2_.size() * (natmol2_.size() - 1)) / 2); + } + } + if (intra_) + { + std::cout << ":: activating intramat calculations" << std::endl; + resmat_intra_d_.resize(natmol2_.size()); + resmat_intra_p_.resize(natmol2_.size()); + appo_intra_d.resize(natmol2_.size()); + appo_intra_p.resize(natmol2_.size()); + if(blk_avg_) + { + block_resmat_intra_p_.resize(natmol2_.size() ); + block_resmat_intra_p2_.resize(natmol2_.size()); + block_resmat_intra_p_temp.resize(natmol2_.size() ); + block_resmat_intra_std_.resize(natmol2_.size() ); + block_counter_intra.resize(natmol2_.size()); + } + } + + if(blk_avg_) + { + int n_frames_blk = 0; + int frnr_appo = 0; + int natom_appo; + long unsigned int nframe_appo; + int64_t *offsets_appo; + + resdata::xtc::Frame *frame_appo; + XDRFILE *trj_appo; + + frame_appo = (resdata::xtc::Frame*)malloc(sizeof(resdata::xtc::Frame)); + read_xtc_header(traj_path_.c_str(), &natom_appo, &nframe_appo, &offsets_appo); + *frame_appo = resdata::xtc::Frame(natom_appo); + frame_appo->nframe = nframe_appo; + frame_appo->offsets = offsets_appo; + trj_appo = xdrfile_open(traj_path_.c_str(), "r"); + printf("\nDefining Block sizes\n"); + + while (frame_appo->read_next_frame(trj_appo, no_pbc_, pbcType_, pbc_) == exdrOK) + { + + if ((frame_appo->time >= t_begin_ && (t_end_ < 0 || frame_appo->time <= t_end_ )) && // within time borders + ( dt_ == 0 || std::fmod(frame_appo->time, dt_) == 0) && (nskip_ == 0 || std::fmod(frnr_appo, nskip_) == 0)) // skip frames + { + n_frames_blk++; + } + frnr_appo++; + } + + printf("\nActivating Block average analysis with block sizes (in frames)\n"); + int i_blk = 4; + while( i_blk(natmol2_.size(), 0)); + for ( std::size_t i = 0; i < natmol2_.size(); i++ ) + { + if (same_) + { + resmat_same_d_[i].resize(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.)); + resmat_same_p_[i].resize(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.)); + appo_same_d[i].resize(num_mol_unique_[i], std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 100.))); + appo_same_p[i].resize(num_mol_unique_[i], std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.))); + if(blk_avg_) + { + block_resmat_same_p_[i].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.))); + block_resmat_same_p2_[i].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.))); + block_resmat_same_p_temp[i].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.))); + block_resmat_same_std_[i].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.))); + block_counter_same[i].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0))); + } + } + if (intra_) + { + resmat_intra_d_[i].resize(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.)); + resmat_intra_p_[i].resize(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.)); + appo_intra_d[i].resize(num_mol_unique_[i],std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 100.))); + appo_intra_p[i].resize(num_mol_unique_[i], std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.))); + if(blk_avg_) + { + block_resmat_intra_p_[i].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.))); + block_resmat_intra_p2_[i].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.))); + block_resmat_intra_p_temp[i].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.))); + block_resmat_intra_std_[i].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0.))); + block_counter_intra[i].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[i], 0))); + } + } + for ( std::size_t j = i + 1; j < natmol2_.size() && cross_; j++ ) + { + resmat_cross_d_[cross_count].resize(num_res_per_molecule[i], std::vector(num_res_per_molecule[j], 0.)); + resmat_cross_p_[cross_count].resize(num_res_per_molecule[i], std::vector(num_res_per_molecule[j], 0.)); + appo_cross_d[cross_count].resize(num_mol_unique_[i],std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[j], 0.))); + appo_cross_p[cross_count].resize(num_mol_unique_[i], std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[j], 0.))); + + if(blk_avg_) + { + block_resmat_cross_p_[cross_count].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[j], 0.))); + block_resmat_cross_p2_[cross_count].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[j], 0.))); + block_resmat_cross_p_temp[cross_count].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[j], 0.))); + block_resmat_cross_std_[cross_count].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[j], 0.))); + block_counter_cross[cross_count].resize(block_sizes_.size(), std::vector>(num_res_per_molecule[i], std::vector(num_res_per_molecule[j], 0))); + } + cross_index_[i][j] = cross_count; + cross_count++; + } + } + + mcut2_ = mol_cutoff_ * mol_cutoff_; + cut_sig_2_ = (cutoff_) * (cutoff_ ); + xcm_ = (rvec*)malloc(nindex_ * sizeof(rvec)); + + if (intra_ || same_) frame_same_mutex_.resize(natmol2_.size()); + if (cross_) frame_cross_mutex_.resize(cross_index_.size()); + std::size_t sum_cross_mol_sizes = 0; + for ( std::size_t i = 0; i < natmol2_.size(); i++ ) + { + if (intra_ || same_) frame_same_mutex_[i] = std::vector(num_res_per_molecule[i] * num_res_per_molecule[i]); + for ( std::size_t j = i+1; j < natmol2_.size() && cross_; j++ ) + { + frame_cross_mutex_[cross_index_[i][j]] = std::vector(num_res_per_molecule[i] * num_res_per_molecule[j]); + } + } + + if (weights_path_ != "") + { + printf("Weights file provided. Reading weights from %s\n", weights_path_.c_str()); + weights_ = resdata::io::read_weights_file(weights_path_); + printf("Found %li frame weights in file\n", weights_.size()); + double w_sum = std::accumulate(std::begin(weights_), std::end(weights_), 0.0, std::plus<>()); + printf("Sum of weights amounts to %lf\n", w_sum); + weights_sum_ = 0.; + } + + printf("Finished preprocessing. Starting frame-by-frame analysis.\n"); + } + + void run() + { + std::chrono::steady_clock::time_point begin;// = std::chrono::steady_clock::now(); + std::chrono::steady_clock::time_point end; //= std::chrono::steady_clock::now(); + std::cout << "Running frame-by-frame analysis" << std::endl; + int frnr = 0; + float progress = 0.0, new_progress = 0.0; + resdata::io::print_progress_bar(progress); + + while (frame_->read_next_frame(trj_, no_pbc_, pbcType_, pbc_) == exdrOK) + { + new_progress = static_cast(frnr) / static_cast(frame_->nframe); + if (new_progress - progress > 0.01) + { + progress = new_progress; + resdata::io::print_progress_bar(progress); + } + if ((frame_->time >= t_begin_ && (t_end_ < 0 || frame_->time <= t_end_ )) && // within time borders + ( dt_ == 0 || std::fmod(frame_->time, dt_) == 0) && (nskip_ == 0 || std::fmod(frnr, nskip_) == 0)) // skip frames + { + double weight = 1.0; + if (!weights_.empty()) + { + weight = weights_[frnr]; + weights_sum_ += weight; + } + // #pragma omp parallel for num_threads(std::min(num_threads_, nindex_)) + for (int i = 0; i < nindex_; i++) + { + clear_rvec(xcm_[i]); + double tm = 0.; + for (int iii = mols_.block(i).begin(); iii < mols_.block(i).end(); iii++) + { + int ii = index_[iii]; + for (int m = 0; (m < DIM); m++) + { + xcm_[i][m] += frame_->x[ii][m]; + } + tm += 1.0; + } + for (int m = 0; (m < DIM); m++) + { + xcm_[i][m] /= tm; + } + } + /* start loop for each molecule */ + begin = std::chrono::steady_clock::now(); + for (int i = 0; i < nindex_; i++) + { + /* start molecule thread*/ + mol_threads_[i] = std::thread( + molecule_routine, i, nindex_, pbc_, frame_->x, + cut_sig_2_, std::cref(natmol2_), std::cref(num_mol_unique_), std::cref(mol_id_), std::cref(cross_index_), + mcut2_, xcm_, mols_, mtop_, + std::ref(frame_cross_mutex_),std::ref(frame_same_mutex_), + std::ref(semaphore_), weight, residue_indeces_, + std::ref(resmat_same_d_), std::ref(resmat_same_p_),std::ref(appo_same_d), std::ref(appo_same_p), + std::ref(resmat_intra_d_), std::ref(resmat_intra_p_),std::ref(appo_intra_d), std::ref(appo_intra_p), + std::ref(resmat_cross_d_), std::ref(resmat_cross_p_),std::ref(appo_cross_d), std::ref(appo_cross_p), + same_, intra_, cross_, index_, std::cref(num_res_per_molecule) + ); + /* end molecule thread */ + } + for ( auto &thread : mol_threads_ ) thread.join(); + end = std::chrono::steady_clock::now(); + + begin = std::chrono::steady_clock::now(); + for(int i = 0; i< natmol2_.size(); i++) + { + if(intra_) + { + if(blk_avg_) resdata::blockavg::AccumulateIntraBlock(i,n_x_ ,std::cref(mol_id_), std::cref(natmol2_), std::ref(block_resmat_intra_p_), std::ref(block_resmat_intra_p2_), std::ref(block_resmat_intra_p_temp), std::ref(block_counter_intra),std::cref(block_sizes_), std::ref(appo_intra_p), std::cref(num_mol_unique_)); + resdata::density::AccumulateIntra(i, std::cref(mol_id_), std::ref(resmat_intra_d_), std::ref(resmat_intra_p_), std::ref(appo_intra_d), std::ref(appo_intra_p), std::cref(num_mol_unique_)); + } + if(same_) + { + if(blk_avg_) resdata::blockavg::AccumulateInterSameBlock(i,n_x_ ,std::cref(mol_id_), std::cref(natmol2_), std::ref(block_resmat_same_p_), std::ref(block_resmat_same_p2_), std::ref(block_resmat_same_p_temp), std::ref(block_counter_same),std::cref(block_sizes_), std::ref(appo_same_p), std::cref(num_mol_unique_)); + resdata::density::AccumulateInterSame(i, std::cref(mol_id_), std::ref(resmat_same_d_), std::ref(resmat_same_p_), std::ref(appo_same_d), std::ref(appo_same_p), std::cref(num_mol_unique_)); + } + if(cross_){ + if(blk_avg_) resdata::blockavg::AccumulateInterCrossBlock(i,n_x_ ,std::cref(mol_id_),std::cref(cross_index_), std::cref(natmol2_), std::ref(block_resmat_cross_p_), std::ref(block_resmat_cross_p2_), std::ref(block_resmat_cross_p_temp), std::ref(block_counter_cross),std::cref(block_sizes_), std::ref(appo_cross_p), std::cref(num_mol_unique_)); + resdata::density::AccumulateInterCross(i, std::cref(mol_id_),std::cref(cross_index_), std::cref(natmol2_), std::ref(resmat_cross_d_), std::ref(resmat_cross_p_), std::ref(appo_cross_d), std::ref(appo_cross_p),std::cref(num_mol_unique_)); + } + } + end = std::chrono::steady_clock::now(); + ++n_x_; + } + ++frnr; + } + + resdata::io::print_progress_bar(1.0); + std::cout<<"number of frames: "<; + using ftype_write_inter_same_res = resdata::ftypes::function_traits; + using ftype_write_inter_cross_res = resdata::ftypes::function_traits; + using ftype_write_inter_cross_res_block = resdata::ftypes::function_traits; + using ftype_write_inter_same_res_block = resdata::ftypes::function_traits; + using ftype_write_intra_res_block = resdata::ftypes::function_traits; + std::function write_intra_res = resdata::ftypes::do_nothing(); + std::function write_inter_same_res = resdata::ftypes::do_nothing(); + std::function write_inter_cross_res = resdata::ftypes::do_nothing(); + std::function write_inter_cross_res_block = resdata::ftypes::do_nothing(); + std::function write_inter_same_res_block = resdata::ftypes::do_nothing(); + std::function write_intra_res_block = resdata::ftypes::do_nothing(); + if (intra_) write_intra_res = resdata::io::f_write_intra_res; + if (same_) write_inter_same_res = resdata::io::f_write_inter_same_res; + if (cross_) write_inter_cross_res = resdata::io::f_write_inter_cross_res; + if (cross_ && blk_avg_) write_inter_cross_res_block = resdata::io::f_write_inter_cross_res_block; + if (same_ && blk_avg_) write_inter_same_res_block = resdata::io::f_write_inter_same_res_block; + if (intra_ && blk_avg_) write_intra_res_block = resdata::io::f_write_intra_res_block; + + for (std::size_t i = 0; i < natmol2_.size(); i++) + { + std::cout << "Writing data for molecule " << i << "..." << std::endl; + resdata::io::print_progress_bar(0.0); + float progress = 0.0, new_progress = 0.0; + + if(same_) + { + write_inter_same_res(output_prefix, i, num_res_per_molecule,residue_indeces_, resmat_same_d_, resmat_same_p_); + if(blk_avg_)write_inter_same_res_block(output_prefix, i, num_res_per_molecule,residue_indeces_, resmat_same_p_, block_resmat_same_std_, block_sizes_); + } + if(intra_) + { + write_intra_res(output_prefix, i, num_res_per_molecule, residue_indeces_, resmat_intra_d_, resmat_intra_p_); + if(blk_avg_)write_intra_res_block(output_prefix, i, num_res_per_molecule,residue_indeces_, resmat_intra_p_, block_resmat_intra_std_, block_sizes_); + } + for (std::size_t j = i + 1; j < natmol2_.size(); j++) + { + if(cross_) + { + write_inter_cross_res(output_prefix, i, j, num_res_per_molecule, cross_index_,residue_indeces_, resmat_cross_d_, resmat_cross_p_); + if(blk_avg_) + { + printf("%i\n", block_sizes_.size()); + write_inter_cross_res_block(output_prefix, i, j, num_res_per_molecule, cross_index_,residue_indeces_,resmat_cross_p_, block_resmat_cross_std_, block_sizes_); + } + } + } + } + resdata::io::print_progress_bar(1.0); + std::cout << "\nFinished!" << std::endl; + } +}; + + +} // namespace resdata + +#endif // _RESDATA_HPP diff --git a/tools/resdata/src/resdata/xtc_frame.hpp b/tools/resdata/src/resdata/xtc_frame.hpp new file mode 100644 index 00000000..1d2c42ef --- /dev/null +++ b/tools/resdata/src/resdata/xtc_frame.hpp @@ -0,0 +1,45 @@ +#ifndef _XTC_FRAME_HPP +#define _XTC_FRAME_HPP + +#include "gromacs/math/vec.h" +#include "gromacs/math/vectypes.h" +#include + +#include + +namespace resdata::xtc { + +class Frame { +public: + int natom; + unsigned long nframe; + int64_t *offsets; + int step; + float time; + matrix box; + rvec *x; + float prec; + + // create default constructor + Frame() { x = (rvec*)malloc(0 * sizeof(rvec)); } + Frame(int natom) : natom(natom) { x = (rvec*)malloc(natom * sizeof(rvec)); } + // ~Frame() { free(x); free(offsets); } + + int read_next_frame(XDRFILE *xd, bool nopbc, PbcType pbc_type, t_pbc *pbc) + { + int status = read_xtc(xd, natom, &step, &time, box, x, &prec); + if (nopbc) + { + pbc = nullptr; + } + else + { + set_pbc(pbc, pbc_type, box); + } + return status; + } +}; + +} // namespace resdata::xtc + +#endif // _XTC_FRAME_HPP \ No newline at end of file