Skip to content

Commit

Permalink
Merge pull request #169 from seoklab/jnooree/issue-162
Browse files Browse the repository at this point in the history
feat(fmt/pdb): implement pdb parser
  • Loading branch information
jnooree authored Feb 14, 2024
2 parents bf98b98 + 7a7125d commit 454f8fb
Show file tree
Hide file tree
Showing 32 changed files with 7,559 additions and 803 deletions.
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@ repos:
- id: check-symlinks
- id: destroyed-symlinks
- id: end-of-file-fixer
exclude: ^test/test_data/
- id: fix-byte-order-marker
exclude: ^test/test_data/
- id: trailing-whitespace
exclude: ^test/test_data/
args: ["--markdown-linebreak-ext=md"]
- id: mixed-line-ending
exclude: ^test/test_data/
args: ["--fix=lf"]
- repo: https://github.com/seoklab/shared-hooks
rev: v0.1.1
Expand Down
17 changes: 15 additions & 2 deletions include/nuri/algo/guess.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,21 @@ constexpr inline double kDefaultThreshold = 0.5;
* If connectivity information is already present and is correct, consider using
* guess_all_types().
*/
extern bool guess_bonds(MoleculeMutator &mut, int conf = 0,
double threshold = kDefaultThreshold);
extern bool guess_everything(MoleculeMutator &mut, int conf = 0,
double threshold = kDefaultThreshold);

/**
* @brief Guess connectivity information of a molecule.
* @param mut The mutator of the molecule to be guessed.
* @param conf The index of the conformation used for guessing.
* @param threshold The threshold for guessing bonds.
* @return true if the guessing is successful.
*
* This function assumes all connectivity information is missing. The
* information present in the molecule could be overwritten by this function.
*/
extern bool guess_connectivity(MoleculeMutator &mut, int conf = 0,
double threshold = kDefaultThreshold);

/**
* @brief Guess types of atoms and bonds, and number of hydrogens of a molecule.
Expand Down
151 changes: 143 additions & 8 deletions include/nuri/algo/rings.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ using Rings = std::vector<std::vector<int>>;
/**
* @brief Find all elementary cycles in the molecular graph.
* @param mol A molecule.
* @param max_size Maximum size of the rings to be found. If negative, all
* rings are found.
* @return A pair of (all elementary cycles, success). If success is `false`,
* the vector is in an unspecified state. This will fail if and only if
* any atom is a member of more than 100 elementary cycles.
Expand All @@ -29,9 +31,51 @@ using Rings = std::vector<std::vector<int>>;
* expected to run in a reasonable time (\f$\sim\mathcal{O}(V^2)\f$) for most
* molecules in practice.
*/
extern std::pair<Rings, bool> find_all_rings(const Molecule &mol);
extern std::pair<Rings, bool> find_all_rings(const Molecule &mol,
int max_size = -1);

/**
* @brief Find all elementary cycles in the substructure.
* @param sub A substructure.
* @param max_size Maximum size of the rings to be found. If negative, all
* rings are found.
* @return A pair of (all elementary cycles, success). If success is `false`,
* the vector is in an unspecified state. This will fail if and only if
* any atom is a member of more than 100 elementary cycles.
*
* This is based on the algorithm described in the following paper:
* Hanser, Th. *et al.* *J. Chem. Inf. Comput. Sci.* **1996**, *36* (6),
* 1146-1152. DOI: [10.1021/ci960322f](https://doi.org/10.1021/ci960322f)
*
* The time complexity of this function is inherently exponential, but it is
* expected to run in a reasonable time (\f$\sim\mathcal{O}(V^2)\f$) for most
* molecules in practice.
*/
extern std::pair<Rings, bool> find_all_rings(const Substructure &sub,
int max_size = -1);

/**
* @brief Find all elementary cycles in the substructure.
* @param sub A substructure.
* @param max_size Maximum size of the rings to be found. If negative, all
* rings are found.
* @return A pair of (all elementary cycles, success). If success is `false`,
* the vector is in an unspecified state. This will fail if and only if
* any atom is a member of more than 100 elementary cycles.
*
* This is based on the algorithm described in the following paper:
* Hanser, Th. *et al.* *J. Chem. Inf. Comput. Sci.* **1996**, *36* (6),
* 1146-1152. DOI: [10.1021/ci960322f](https://doi.org/10.1021/ci960322f)
*
* The time complexity of this function is inherently exponential, but it is
* expected to run in a reasonable time (\f$\sim\mathcal{O}(V^2)\f$) for most
* molecules in practice.
*/
extern std::pair<Rings, bool> find_all_rings(const ConstSubstructure &sub,
int max_size = -1);

namespace internal {
template <class MoleculeLike>
struct FindRingsCommonData;
} // namespace internal

Expand Down Expand Up @@ -65,13 +109,16 @@ namespace internal {
* \mathcal{O}(E)\f$ is size of SSSR. For most molecules, however, this is
* \f$\mathcal{O}(V^3)\f$.
*/
template <class MoleculeLike>
class RingSetsFinder {
public:
/**
* @brief Construct a new Rings Finder object.
* @param mol A molecule.
* @param max_size Maximum size of the rings to be found. If negative, all
* rings are found.
*/
explicit RingSetsFinder(const Molecule &mol);
explicit RingSetsFinder(const MoleculeLike &mol, int max_size = -1);

RingSetsFinder(const RingSetsFinder &) = delete;
RingSetsFinder &operator=(const RingSetsFinder &) = delete;
Expand All @@ -97,13 +144,22 @@ class RingSetsFinder {
Rings find_sssr() const;

private:
const Molecule *mol_;
std::unique_ptr<internal::FindRingsCommonData> data_;
const MoleculeLike *mol_;
std::unique_ptr<internal::FindRingsCommonData<MoleculeLike>> data_;
};

template <class MoleculeLike>
RingSetsFinder(const MoleculeLike &, int) -> RingSetsFinder<MoleculeLike>;

extern template class RingSetsFinder<Molecule>;
extern template class RingSetsFinder<Substructure>;
extern template class RingSetsFinder<ConstSubstructure>;

/**
* @brief Find union of the all SSSRs in the molecular graph.
* @param mol A molecule.
* @param max_size Maximum size of the rings to be found. If negative, all
* rings are found.
* @return Union of the all SSSRs in the molecular graph.
* @sa find_sssr(), nuri::RingSetsFinder::find_relevant_rings()
*
Expand All @@ -113,13 +169,52 @@ class RingSetsFinder {
* @note If both relevant rings and SSSR are needed, it is recommended to use
* the nuri::RingSetsFinder class instead of the free functions.
*/
inline Rings find_relevant_rings(const Molecule &mol) {
return RingSetsFinder(mol).find_relevant_rings();
inline Rings find_relevant_rings(const Molecule &mol, int max_size = -1) {
return RingSetsFinder(mol, max_size).find_relevant_rings();
}

/**
* @brief Find union of the all SSSRs in the substructure.
* @param sub A substructure.
* @param max_size Maximum size of the rings to be found. If negative, all
* rings are found.
* @return Union of the all SSSRs in the substructure.
* @sa find_sssr(), nuri::RingSetsFinder::find_relevant_rings()
*
* This is a convenience wrapper of the
* nuri::RingSetsFinder::find_relevant_rings() member function.
*
* @note If both relevant rings and SSSR are needed, it is recommended to use
* the nuri::RingSetsFinder class instead of the free functions.
*/
inline Rings find_relevant_rings(const Substructure &sub, int max_size = -1) {
return RingSetsFinder(sub, max_size).find_relevant_rings();
}

/**
* @brief Find union of the all SSSRs in the substructure.
* @param sub A substructure.
* @param max_size Maximum size of the rings to be found. If negative, all
* rings are found.
* @return Union of the all SSSRs in the substructure.
* @sa find_sssr(), nuri::RingSetsFinder::find_relevant_rings()
*
* This is a convenience wrapper of the
* nuri::RingSetsFinder::find_relevant_rings() member function.
*
* @note If both relevant rings and SSSR are needed, it is recommended to use
* the nuri::RingSetsFinder class instead of the free functions.
*/
inline Rings find_relevant_rings(const ConstSubstructure &sub,
int max_size = -1) {
return RingSetsFinder(sub, max_size).find_relevant_rings();
}

/**
* @brief Find a smallest set of smallest rings (SSSR) of the molecular graph.
* @param mol A molecule.
* @param max_size Maximum size of the rings to be found. If negative, all
* rings are found.
* @return *A* smallest set of smallest rings (SSSR) of the molecular graph.
* @sa find_relevant_rings(), nuri::RingSetsFinder::find_sssr()
* @note This function does not guarantee that the returned set is unique, nor
Expand All @@ -131,8 +226,48 @@ inline Rings find_relevant_rings(const Molecule &mol) {
* @note If both relevant rings and SSSR are needed, it is recommended to use
* the nuri::RingSetsFinder class instead of the free functions.
*/
inline Rings find_sssr(const Molecule &mol) {
return RingSetsFinder(mol).find_sssr();
inline Rings find_sssr(const Molecule &mol, int max_size = -1) {
return RingSetsFinder(mol, max_size).find_sssr();
}

/**
* @brief Find a smallest set of smallest rings (SSSR) of the substructure.
* @param sub A substructure.
* @param max_size Maximum size of the rings to be found. If negative, all
* rings are found.
* @return *A* smallest set of smallest rings (SSSR) of the substructure.
* @sa find_relevant_rings(), nuri::RingSetsFinder::find_sssr()
* @note This function does not guarantee that the returned set is unique, nor
* that the result is reproducible even for the same molecule.
*
* This is a convenience wrapper of the nuri::RingSetsFinder::find_sssr() member
* function.
*
* @note If both relevant rings and SSSR are needed, it is recommended to use
* the nuri::RingSetsFinder class instead of the free functions.
*/
inline Rings find_sssr(const Substructure &sub, int max_size = -1) {
return RingSetsFinder(sub, max_size).find_sssr();
}

/**
* @brief Find a smallest set of smallest rings (SSSR) of the substructure.
* @param sub A substructure.
* @param max_size Maximum size of the rings to be found. If negative, all
* rings are found.
* @return *A* smallest set of smallest rings (SSSR) of the substructure.
* @sa find_relevant_rings(), nuri::RingSetsFinder::find_sssr()
* @note This function does not guarantee that the returned set is unique, nor
* that the result is reproducible even for the same molecule.
*
* This is a convenience wrapper of the nuri::RingSetsFinder::find_sssr() member
* function.
*
* @note If both relevant rings and SSSR are needed, it is recommended to use
* the nuri::RingSetsFinder class instead of the free functions.
*/
inline Rings find_sssr(const ConstSubstructure &sub, int max_size = -1) {
return RingSetsFinder(sub, max_size).find_sssr();
}
} // namespace nuri

Expand Down
4 changes: 3 additions & 1 deletion include/nuri/core/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,7 @@ class Graph {
return find_edge_helper(*this, src, dst);
}

void clear_edge() {
void clear_edges() {
edges_.clear();
for (std::vector<AdjEntry> &adj: adj_list_)
adj.clear();
Expand Down Expand Up @@ -1534,6 +1534,8 @@ namespace internal {
: src_(other.src_), dst_(other.dst_), eid_(other.eid_),
subgraph_(other.subgraph_) { }

constexpr auto id() const noexcept { return eid_; }

constexpr auto src() const noexcept { return subgraph_->node(src_); }
constexpr auto dst() const noexcept { return subgraph_->node(dst_); }

Expand Down
Loading

0 comments on commit 454f8fb

Please sign in to comment.