Skip to content

Commit

Permalink
feat(fmt/mmcif): save entity_id property to residues and chains (#454)
Browse files Browse the repository at this point in the history
  • Loading branch information
jnooree authored Feb 19, 2025
1 parent 2ca1d22 commit 48db8a9
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 14 deletions.
26 changes: 18 additions & 8 deletions src/fmt/fmt_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#ifndef NURI_FMT_FMT_INTERNAL_H_
#define NURI_FMT_FMT_INTERNAL_H_

#include <cstddef>
#include <functional>
#include <iterator>
#include <string>
Expand Down Expand Up @@ -97,15 +98,14 @@ void pdb_update_confs(Molecule &mol, const std::vector<AT> &atom_data,
constexpr int kChainIdx = 0;

template <class RT, class AT, class ChainAsSv, class ResidueMember,
class SeqMember, class ICodeAsSv,
class SeqMember, class ICodeAsSv, class EntityMember = std::nullptr_t,
std::enable_if_t<!std::is_reference_v<RT>, int> = 0>
// NOLINTNEXTLINE(*-missing-std-forward)
void pdb_update_substructs(Molecule &mol, RT &&residues,
const std::vector<AT> &atoms,
const ChainAsSv &chain_sv,
const ResidueMember &residue_member,
const SeqMember &seq_member,
const ICodeAsSv &icode_sv) {
void pdb_update_substructs(
// NOLINTNEXTLINE(*-missing-std-forward)
Molecule &mol, RT &&residues, const std::vector<AT> &atoms,
const ChainAsSv &chain_sv, const ResidueMember &residue_member,
const SeqMember &seq_member, const ICodeAsSv &icode_sv,
const EntityMember &eid_member = nullptr) {
auto &subs = mol.substructures();

std::vector<std::pair<std::string_view, std::vector<int>>> chains;
Expand Down Expand Up @@ -138,6 +138,11 @@ void pdb_update_substructs(Molecule &mol, RT &&residues,
if (!icode.empty())
sit->add_prop("icode", icode);

if constexpr (!std::is_same_v<EntityMember, std::nullptr_t>) {
sit->add_prop("entity_id",
std::invoke(eid_member, atoms[sit->atom_ids()[0]]));
}

ABSL_DCHECK(sit->props()[kChainIdx].first == "chain");
}

Expand All @@ -146,6 +151,11 @@ void pdb_update_substructs(Molecule &mol, RT &&residues,
sit->update(std::move(chain.second), {});
sit->name() = chain.first;
sit->set_id(i);

if constexpr (!std::is_same_v<EntityMember, std::nullptr_t>) {
sit->add_prop("entity_id",
std::invoke(eid_member, atoms[sit->atom_ids()[0]]));
}
}
}
} // namespace internal
Expand Down
27 changes: 21 additions & 6 deletions src/fmt/mmcif.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,8 @@ std::string as_key(std::string_view prefix, std::string_view alt_id) {

class MmcifAtomData {
public:
explicit MmcifAtomData(MmcifAtomInfo first): data_ { first } { }
explicit MmcifAtomData(MmcifAtomInfo first, std::string_view entity_id)
: data_ { first }, entity_id_(entity_id) { }

bool add_info(MmcifAtomInfo info) {
ABSL_DCHECK(static_cast<bool>(info));
Expand Down Expand Up @@ -422,8 +423,11 @@ class MmcifAtomData {
return static_cast<int>(it - data_.begin());
}

std::string_view entity_id() const { return entity_id_; }

private:
std::vector<MmcifAtomInfo> data_;
std::string_view entity_id_;
};

struct MmcifResidueInfo {
Expand Down Expand Up @@ -611,7 +615,7 @@ class MmcifModelData {
explicit MmcifModelData(int model_num): model_num_(model_num) { }

void add_atom(MmcifAtomInfo info, std::string_view comp_id,
const internal::CifValue &id) {
std::string_view entity_id, const internal::CifValue &id) {
ABSL_DCHECK(static_cast<bool>(info));

int ri = residues_.prepare_add_atom(info, comp_id, id);
Expand All @@ -622,6 +626,14 @@ class MmcifModelData {
const int ai = it->second;

if (!first) {
if (entity_id != atoms_[ai].entity_id()) {
ABSL_LOG(WARNING)
<< "Entity ID mismatch: " << entity_id << " vs "
<< atoms_[ai].entity_id() << "; "
<< "ignoring atom with serial number " << info.id().atom_id;
return;
}

bool new_altloc = atoms_[ai].add_info(info);
ABSL_LOG_IF(WARNING, !new_altloc)
<< "Duplicate atom " << info.id().atom_id << " of residue "
Expand All @@ -630,7 +642,7 @@ class MmcifModelData {
return;
}

atoms_.push_back(MmcifAtomData(info));
atoms_.push_back(MmcifAtomData(info, entity_id));
residues_.add_atom_at(ri, ai);
}

Expand Down Expand Up @@ -660,7 +672,8 @@ class MmcifModelData {
internal::pdb_update_substructs(mol, std::move(residues_), atoms_,
&ResidueId::asym_id,
&MmcifResidueInfo::comp_id,
&ResidueId::seq_id, &ResidueId::ins_code);
&ResidueId::seq_id, &ResidueId::ins_code,
&MmcifAtomData::entity_id);
return mol;
}

Expand Down Expand Up @@ -701,7 +714,9 @@ std::vector<Molecule> mmcif_read_next_block(CifParser &parser) {
alt_id = NullableCifColumn::from_key(
block.data(), "_atom_site.label_alt_id"),
type_symbol = NullableCifColumn::from_key(
block.data(), "_atom_site.type_symbol");
block.data(), "_atom_site.type_symbol"),
entity_id = NullableCifColumn::from_key(
block.data(), "_atom_site.label_entity_id");

TypedNullableColumn<absl::SimpleAtof, false> occupancy =
NullableCifColumn::from_key(block.data(), "_atom_site.occupancy");
Expand Down Expand Up @@ -741,7 +756,7 @@ std::vector<Molecule> mmcif_read_next_block(CifParser &parser) {
if (first)
models.push_back(MmcifModelData(mid));

models[it->second].add_atom(info, *comp_id[i], id);
models[it->second].add_atom(info, *comp_id[i], *entity_id[i], id);
}

StructConnIndexer ptnr1(block.data(), res_idx, 0),
Expand Down

0 comments on commit 48db8a9

Please sign in to comment.